Upload needed packages

library(plyr)
library(tidyverse)
library(DESeq2)
library(dplyr)
library(ggplot2)

Part A: Preparing count matrices and dataframe for binomial test

Step A-01: Read the data/table

counts_2ms01e <- read.table(
  "final_counts_2ms01e.txt", sep="\t", header = TRUE)
counts_2ms02g <- read.table(
  "final_counts_2ms02g.txt", sep="\t", header = TRUE)
counts_2ms03g <- read.table(
  "final_counts_2ms03g.txt", sep="\t", header = TRUE)
counts_2ms04h <- read.table(
  "final_counts_2ms04h.txt", sep="\t", header = TRUE)

Step A-02: remove duplicate rows, merge the table, select rows with enough counts

removing duplicates if they have same start position and transcript_ID

counts_2ms01e.DeDup <- counts_2ms01e %>%
  distinct(start, transcript_ID, .keep_all = TRUE)
counts_2ms02g.DeDup <- counts_2ms02g %>%
  distinct(start, transcript_ID, .keep_all = TRUE)
counts_2ms03g.DeDup <- counts_2ms03g %>%
  distinct(start, transcript_ID, .keep_all = TRUE)
counts_2ms04h.DeDup <- counts_2ms04h %>%
  distinct(start, transcript_ID, .keep_all = TRUE)

count the number of rows again

nrow(counts_2ms01e.DeDup)
## [1] 3017
nrow(counts_2ms02g.DeDup)
## [1] 3017
nrow(counts_2ms03g.DeDup)
## [1] 3017
nrow(counts_2ms04h.DeDup)
## [1] 3017

Step A-03: further filtering and renaming

I. select required columns II. rename columns - to preserve column-sample relationship

A-03 I: Select only required columns now:

contig, start, end, geneName, geneID, transcriptID, unqC_My, unqC_Sp, mulC_My, mulC_Sp, totalC_My, totalC_Sp

counts_2ms01e.selected <- select(
  counts_2ms01e.DeDup, contig, 
  start, end, gene_Name, gene_ID, 
  transcript_ID, 
  matches("unqC|mulC|totalC", ignore.case = TRUE))

counts_2ms02g.selected <- select(
  counts_2ms02g.DeDup, contig, 
  start, transcript_ID,
  matches("unqC|mulC|totalC", ignore.case = TRUE))

counts_2ms03g.selected <- select(
  counts_2ms03g.DeDup, contig, 
  start, transcript_ID,
  matches("unqC|mulC|totalC", ignore.case = TRUE))

counts_2ms04h.selected <- select(
  counts_2ms04h.DeDup, contig, 
  start, transcript_ID,
  matches("unqC|mulC|totalC", ignore.case = TRUE))

A-03 II: Rename some columns to retain sample-data relationship when merging

counts_2ms01e.Renamed <- plyr::rename(
  counts_2ms01e.selected, 
  c(unqC_My = "unqC_My.ms01e", unqC_Sp = "unqC_Sp.ms01e",
    mulC_My = "mulC_My.ms01e", mulC_Sp = "mulC_Sp.ms01e",
    totalC_My = "totalC_My.ms01e", totalC_Sp = "totalC_Sp.ms01e"))

counts_2ms02g.Renamed <- plyr::rename(
  counts_2ms02g.selected, 
  c(unqC_My = "unqC_My.ms02g", unqC_Sp = "unqC_Sp.ms02g",
    mulC_My = "mulC_My.ms02g", mulC_Sp = "mulC_Sp.ms02g",
    totalC_My = "totalC_My.ms02g", totalC_Sp = "totalC_Sp.ms02g"))

counts_2ms03g.Renamed <- plyr::rename(
  counts_2ms03g.selected, 
  c(unqC_My = "unqC_My.ms03g", unqC_Sp = "unqC_Sp.ms03g",
    mulC_My = "mulC_My.ms03g", mulC_Sp = "mulC_Sp.ms03g",
    totalC_My = "totalC_My.ms03g", totalC_Sp = "totalC_Sp.ms03g"))

counts_2ms04h.Renamed <- plyr::rename(
  counts_2ms04h.selected, 
  c(unqC_My = "unqC_My.ms04h", unqC_Sp = "unqC_Sp.ms04h",
    mulC_My = "mulC_My.ms04h", mulC_Sp = "mulC_Sp.ms04h",
    totalC_My = "totalC_My.ms04h", totalC_Sp = "totalC_Sp.ms04h"))

Step A-04: merge/join different dataframes

counts.merged.ms1e2g3g4h <- merge(
  counts_2ms01e.Renamed, counts_2ms02g.Renamed,
  by=c("contig", "start", "transcript_ID")) %>% 
  merge(counts_2ms03g.Renamed,
        by=c("contig", "start", "transcript_ID")) %>%
  merge(counts_2ms04h.Renamed,
        by=c("contig", "start", "transcript_ID"))
  # Note: This will merge the dataframes by matching the names of the contig, start, trans_ID

Step A-05: Further filtering

Take the merged data and select only genes/transcripts ..

I. ..with sufficient coverage (above 10 unique counts in at least one haplotype of 4 samples i.e in one unqC columns out of 8 cols)

  1. ..with sum(mulC) counts < 10% of sum(totalC)

A-05: I). Select rows with good coverage (above 10 counts in at least one haplotype)

counts.ms1e2g3g4h.Abv10Counts <- filter(
  counts.merged.ms1e2g3g4h, 
  unqC_My.ms01e>10 |unqC_Sp.ms01e>10 |unqC_My.ms02g>10 |unqC_Sp.ms02g>10 
  |unqC_My.ms03g>10 |unqC_Sp.ms03g>10 |unqC_My.ms04h>10 |unqC_Sp.ms04h>10)

A-05: II). select row where mulC reads are less than 20 % of totalCount reads or unqC Reads > 20% totalReads

# First sum the unqC, mulC and totalC
counts.ms1e2g3g4h.Summed <- cbind(
  counts.ms1e2g3g4h.Abv10Counts, 
  unqC_total = counts.ms1e2g3g4h.Abv10Counts %>%
    select(matches("unqC")) %>%
    rowSums(),
  mulC_total = 
    counts.ms1e2g3g4h.Abv10Counts %>% 
    select(matches("mulC")) %>%
    rowSums(), 
  totalC_total = counts.ms1e2g3g4h.Abv10Counts %>%
    select(matches("totalC")) %>%
    rowSums())

# Now, Select rows where the mulC reads < 20% of totalReads or unqC Reads > 20% totalReads

counts.ms1e2g3g4h.filt01 <- filter(
  counts.ms1e2g3g4h.Summed, 
  mulC_total < .20*totalC_total | 
    unqC_total > .20*totalC_total) %>% 
  # remove the totaled column
  select(-unqC_total, -mulC_total, -totalC_total) %>%
  # then sort by contig and start position
  arrange(start, contig)

Step-A-06: Prepare dataframe for binomial test analysis

I). Make a vertical dataframe with unqC reads for My and Sp alleles as separate columns and all of the samples together II). Apply additional filter (unqC_Sum > 16)

Step A-06 I). Dataframe for binomial test

counts.vert.filt01 <- counts.ms1e2g3g4h.filt01 %>% 
  select(c(gene_ID, transcript_ID, starts_with("unqc"))) %>%
  pivot_longer(-c(gene_ID, transcript_ID)) %>%
  mutate(sample = paste0("2",str_sub(name, -5, -1)),
         allele = str_sub(name, 1, 7)) %>%
  select(-name) %>%
  pivot_wider(names_from = allele, values_from = value)

Step A-06 II). Additional filter

To avoid situations with (0,0) counts and leave some significantly

counts.ForBinom <- counts.vert.filt01 %>%
  mutate(unqC_Sum = unqC_My + unqC_Sp) %>%
  filter(unqC_Sum >= 16)

Part B: Prepare DESeqDataSet Object, sample Info and the design formula

Step B-01: DESeqDataSet Count Object

Import the expression data and convert it into a matrix

# matrix data only has counts with one rowname column
# > the column "transcript_ID" can be set as rownames
# > select only unique counts (unqC) columns for data analyses
expressionData <- data.frame(
  counts.ms1e2g3g4h.filt01, row.names = "transcript_ID") %>%
  select(matches("unqC", ignore.case = TRUE))

head(expressionData)
##              unqC_My.ms01e unqC_Sp.ms01e unqC_My.ms02g unqC_Sp.ms02g
## AL2G10020.t1          1414          3590             2           724
## AL2G10030.t1          1470           940           562           476
## AL2G10050.t1           212           390            30             0
## AL2G10060.t1            70            70             8             4
## AL2G10070.t1            26            42             0             0
## AL2G10080.t1            64            80            42            32
##              unqC_My.ms03g unqC_Sp.ms03g unqC_My.ms04h unqC_Sp.ms04h
## AL2G10020.t1           752          2764          1806          2934
## AL2G10030.t1          1856           948          1538           928
## AL2G10050.t1           206           212           134            98
## AL2G10060.t1            50            50           114            34
## AL2G10070.t1            34            38             8            42
## AL2G10080.t1            58            70            24            12

Step B-02: Sample Info

  • design/set covariate factors for the test i.e the experimental condition, family, control/treatment group .. # .. each sample belongs to

*create a dataframe detailing the above co-variate:sample relationship

e.g: # individual <- factor(c(1,1,2,2,3,3,4,4)) # OR

  # sample level variates
sampleID <- factor(rep(c("ms01e", "ms02g", "ms03g", "ms04h"), each = 2))
sampleID
## [1] ms01e ms01e ms02g ms02g ms03g ms03g ms04h ms04h
## Levels: ms01e ms02g ms03g ms04h
  # haplotype level variates
hapType <- factor(rep(c("My", "Sp"), 4))
hapType
## [1] My Sp My Sp My Sp My Sp
## Levels: My Sp
  # maternal cytoplasm level variates
cytoplasm = factor(rep("Ma", 8))
cytoplasm
## [1] Ma Ma Ma Ma Ma Ma Ma Ma
## Levels: Ma
  # family level variates
familyGroup <- factor(rep(c("e", "g", "g", "h"), each=2))
familyGroup
## [1] e e g g g g h h
## Levels: e g h

Step B-03: create a dataframe detailing the above co-variate:sample relationship

exp_factors = data.frame(sampleID = sampleID, 
                         hapType = hapType,
                         familyGroup = familyGroup, 
                         maternalEff = cytoplasm)
head(exp_factors)
##   sampleID hapType familyGroup maternalEff
## 1    ms01e      My           e          Ma
## 2    ms01e      Sp           e          Ma
## 3    ms02g      My           g          Ma
## 4    ms02g      Sp           g          Ma
## 5    ms03g      My           g          Ma
## 6    ms03g      Sp           g          Ma

Step B-04: Design formula (experimental design set up for ASE)

  • make a design to compare between two haplotypes, after accounting for other variates
  • this design can be directly added to DESeq2 algorithm

**Optional: if our interest is which genes show ASE differences across different sample IDs (or group) our experimental design would be: expDesign <- model.matrix(~0 + sampleID + hapType + sampleID:hapType)

expDesign <- model.matrix(~0 + sampleID + hapType)
expDesign
##   sampleIDms01e sampleIDms02g sampleIDms03g sampleIDms04h hapTypeSp
## 1             1             0             0             0         0
## 2             1             0             0             0         1
## 3             0             1             0             0         0
## 4             0             1             0             0         1
## 5             0             0             1             0         0
## 6             0             0             1             0         1
## 7             0             0             0             1         0
## 8             0             0             0             1         1
## attr(,"assign")
## [1] 1 1 1 1 2
## attr(,"contrasts")
## attr(,"contrasts")$sampleID
## [1] "contr.treatment"
## 
## attr(,"contrasts")$hapType
## [1] "contr.treatment"
colnames(expDesign)
## [1] "sampleIDms01e" "sampleIDms02g" "sampleIDms03g" "sampleIDms04h"
## [5] "hapTypeSp"

Step B-05: Provide count data as matrix, colData and design

colData (explains the co-variates:sample relationship)

ASE_Matrix <- DESeqDataSetFromMatrix(countData = expressionData, 
                                      colData = exp_factors, 
                                      design = ~ 0 + sampleID + hapType)

ASE_Matrix
## class: DESeqDataSet 
## dim: 1761 8 
## metadata(1): version
## assays(1): counts
## rownames(1761): AL2G10020.t1 AL2G10030.t1 ... AL2G41810.t1 AL2G41820.t1
## rowData names(0):
## colnames(8): unqC_My.ms01e unqC_Sp.ms01e ... unqC_My.ms04h
##   unqC_Sp.ms04h
## colData names(4): sampleID hapType familyGroup maternalEff
colnames(ASE_Matrix)
## [1] "unqC_My.ms01e" "unqC_Sp.ms01e" "unqC_My.ms02g" "unqC_Sp.ms02g"
## [5] "unqC_My.ms03g" "unqC_Sp.ms03g" "unqC_My.ms04h" "unqC_Sp.ms04h"

Step B-06: Set normalization factors for all columns to 1 (one) => ..

.. no normalization for any variabilty betweens samples (this is done so that any library size differences won’t absorb the ASE)

sizeFactors(ASE_Matrix) <- rep(1, 2*4)

Part C: Exploratory analyses, Diagnostic plots and visualization

There are two separate paths in this workflow; the one we will see first involves transformations of the counts in order to visually explore sample relationships. In the second part, we will go back to the original raw counts for statistical testing. This is critical because the statistical testing methods rely on original count data (not scaled or transformed) for calculating the precision of measurements.

Details on part-D: * Exploratory plots can be applied on raw counts to see how the data looks. This can help reveal if distribution of the data is as expected. * Plot like PCA, dispersion, etc. * Depending upon the data the raw data can be transformed as need be to improve exploration/diagnosis

This plot are made using: classic Plot, ggplot2 and functions within DESeq2 Raw counts, transformed raw counts, results are used to prepare this plots

Step C-01: Work with rlog-transformed data. # Why transform the data? - See

Step C-01 I:

Link: https://www.bioconductor.org/help/course-materials/2016/CSAMA/lab-3-rnaseq/rnaseq_gene_CSAMA2016.html#transformations Link: https://www.bioconductor.org/help/workflows/rnaseqGene/#the-rlog-and-variance-stabilizing-transformations

While the ASE test is done using “DESeq” function plotting of the raw data may cause some problems. Thus it is important to transform the data to make it more homoskedastic and appr. for showing in plots. We are using the rlog transformation since is it appr. for small sample (n<100)

rld.ASE_Matrix <- rlog(ASE_Matrix, blind = TRUE)  # using rlog
vsd.ASE_Matrix <- vst(ASE_Matrix, blind = TRUE)  # variance stabilizing transformation

Step C-01 II :

Plot the first Haplotype against second haplotype for log2 transformation (simply add 1, to avoid log of zero).

Prepare Dataset for plots

df.toPlot <- bind_rows(
  as_data_frame(log2(counts(ASE_Matrix, normalized=TRUE)[, 1:2]+1)) %>%
    mutate(transformation = "log2(x + 1)"),
  as_data_frame(assay(rld.ASE_Matrix)[, 1:2]) %>% mutate(transformation = "rlog"),
  as_data_frame(assay(vsd.ASE_Matrix)[, 1:2]) %>% mutate(transformation = "vst"))
## Warning: `as_data_frame()` was deprecated in tibble 2.0.0.
## Please use `as_tibble()` instead.
## The signature and semantics have changed, see `?as_tibble`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
colnames(df.toPlot)[1:2] <- c("My", "Sp")

plot the effects of the different transformation (log2, rlog and vst)

df.toPlot <- bind_rows(
  as_data_frame(log2(counts(ASE_Matrix, normalized=TRUE)[, 1:2]+1)) %>%
    mutate(transformation = "log2(x + 1)"),
  as_data_frame(assay(rld.ASE_Matrix)[, 1:2]) %>% mutate(transformation = "rlog"),
  as_data_frame(assay(vsd.ASE_Matrix)[, 1:2]) %>% mutate(transformation = "vst"))

colnames(df.toPlot)[1:2] <- c("My", "Sp")
ggplot(df.toPlot, aes(x = My, y = Sp)) + geom_hex(bins = 80) +
  coord_fixed() + facet_grid( . ~ transformation) +
  ggtitle("Comparison of different transformation of raw RNAseq counts") + 
  theme(plot.title = element_text(size = 10, face = "bold"))
## Warning: Computation failed in `stat_binhex()`:
## 
## Computation failed in `stat_binhex()`:
## 
## Computation failed in `stat_binhex()`:

ggplot(df.toPlot) +
  geom_density(aes(x = My), fill = "blue", alpha = 0.6) +
  geom_density(aes(x = Sp), fill = "light blue", alpha = 0.6) + facet_grid(~ transformation)

ggplot(df.toPlot) +
  geom_histogram(aes(x = My), fill = "blue", alpha = 0.6) +
  geom_histogram(aes(x = Sp), fill = "light blue", alpha = 0.6) + facet_grid(~ transformation)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Result: The rlog transformation obviously looks better

Step C-02: Compare and plot distances between samples

C-02 I : Sample distances- using “rlog” transformed data

Assess overall similarity between samples

Questions: * Are sample similar by haplotypes or are haplotype similar by sample? * Do we have roughly equal contribution from all genes?

Calculate Euclidian distances between samples

sampleDists.ASE_Matrix <- dist(t(assay(rld.ASE_Matrix)))
  # Results: We can see that a Sample-Haplotype is closest to it's own Sample-AltHaplotype
    # then it is more closer to AltSample-Haplotype and then AltSample-AltHaplotype
    # E.g My.ms01e is closest to Sp.ms01e, then to other My haplotype than Sp haplotype
    # My.ms01e vs. Sp.ms01e = 41.85; vs. My.ms02g = 51.021; vs. Sp.ms02g = 62.23
    # is there a way to visualize this sample distances?

plot(hclust(sampleDists.ASE_Matrix))

C-02 II : visualize the sample distances in a heatMap using rlog data

Useful packages

library("gplots" )
## 
## Attaching package: 'gplots'
## The following object is masked from 'package:IRanges':
## 
##     space
## The following object is masked from 'package:S4Vectors':
## 
##     space
## The following object is masked from 'package:stats':
## 
##     lowess
library("RColorBrewer" )
library("pheatmap")
library("PoiClaClu") # For Poisson Distances

colors = colorRampPalette( rev(brewer.pal(9, "Blues")) )(255)

Prepare dataframes for heatmaps with Euclidian Distances and Poisson Distances

# Euclidean Distances
sampleDistMatrix <- as.matrix(sampleDists.ASE_Matrix)
rownames(sampleDistMatrix) <- paste( rld.ASE_Matrix$hapType ,
                                     rld.ASE_Matrix$sampleID, sep="-" )
colnames(sampleDistMatrix) <- NULL

sampleDistMatrix
##              [,1]     [,2]     [,3]     [,4]     [,5]     [,6]     [,7]
## My-ms01e  0.00000 41.92941 51.35710 63.35913 48.13836 59.76878 49.75835
## Sp-ms01e 41.92941  0.00000 62.17238 59.62649 58.93902 50.94437 58.62307
## My-ms02g 51.35710 62.17238  0.00000 46.89185 51.70238 62.34089 47.72633
## Sp-ms02g 63.35913 59.62649 46.89185  0.00000 63.02487 58.14543 60.17662
## My-ms03g 48.13836 58.93902 51.70238 63.02487  0.00000 43.69786 47.11232
## Sp-ms03g 59.76878 50.94437 62.34089 58.14543 43.69786  0.00000 57.05899
## My-ms04h 49.75835 58.62307 47.72633 60.17662 47.11232 57.05899  0.00000
## Sp-ms04h 60.79595 53.07824 60.81861 57.91596 59.74291 51.55098 44.03559
##              [,8]
## My-ms01e 60.79595
## Sp-ms01e 53.07824
## My-ms02g 60.81861
## Sp-ms02g 57.91596
## My-ms03g 59.74291
## Sp-ms03g 51.55098
## My-ms04h 44.03559
## Sp-ms04h  0.00000

Standard heatmap (Euclidean Distances)

pheatmap(sampleDistMatrix,
         clustering_distance_rows = sampleDists.ASE_Matrix,
         clustering_distance_cols = sampleDists.ASE_Matrix,
         col = colors)

Prepare Dataframe for ggplot heatmap

sampleDistDF <- as.data.frame(sampleDistMatrix)
colnames(sampleDistDF) <- c("My-ms01e", "Sp-ms01e", "My-ms02g", "Sp-ms02g", 
                                   "My-ms03g", "Sp-ms03g", "My-ms04h", "Sp-ms04h")

# Make columns from rownames
sampleDistDF$sample_1 <- rownames(sampleDistDF)

# Make long-format data frame
sampleDistDF <- sampleDistDF %>% 
  pivot_longer(!sample_1,
               names_to = "sample_2",
               values_to = "dist")

GGplot Heatmap

# GGplot Heatmap Distances
grDevices::png(filename = "htmp_dist.png",  width = 12.5, height = 8, units = 'cm', res = 400)

htmp_dist <- ggplot(sampleDistDF, aes(sample_1, sample_2, fill = dist)) +
  geom_tile() +
  scale_fill_gradient(low = "#457b92", high = "#f5ffff") +
  labs(title = "Heatmap: sample distances") +
  theme_minimal() +
  theme(
    axis.title.x = element_blank(),
    axis.title.y = element_blank(),
    legend.title = element_blank(),
    plot.title = element_text(size = 12),
    axis.text.x = element_text(angle = 45, hjust=1)
  )

htmp_dist

dev.off()
## quartz_off_screen 
##                 2
knitr::include_graphics("htmp_dist.png")

C-02 III : sample distances heatMap using poission

# this function takes original count matrix (not normalized) with samples as rows

poisDists <- PoissonDistance(t(counts(ASE_Matrix)))
samplePoisDistMatrix <- as.matrix(poisDists$dd)

rownames(samplePoisDistMatrix) <- paste(
  rld.ASE_Matrix$hapType, rld.ASE_Matrix$sampleID, sep = "-")

colnames(samplePoisDistMatrix) <- NULL

samplePoisDistMatrix
##               [,1]      [,2]      [,3]      [,4]      [,5]      [,6]     [,7]
## My-ms01e    0.0000  500.7647  697.9457 1062.0223  624.6305  962.4118 649.6450
## Sp-ms01e  500.7647    0.0000 1068.3020  954.9788  931.9396  716.0597 933.0843
## My-ms02g  697.9457 1068.3020    0.0000  630.8040  747.1794 1080.7018 659.1200
## Sp-ms02g 1062.0223  954.9788  630.8040    0.0000 1086.9830  924.2734 997.0650
## My-ms03g  624.6305  931.9396  747.1794 1086.9830    0.0000  529.0677 597.6537
## Sp-ms03g  962.4118  716.0597 1080.7018  924.2734  529.0677    0.0000 877.0273
## My-ms04h  649.6450  933.0843  659.1200  997.0650  597.6537  877.0273   0.0000
## Sp-ms04h  978.3080  742.6169 1020.0877  900.0103  928.6833  690.0129 508.5604
##               [,8]
## My-ms01e  978.3080
## Sp-ms01e  742.6169
## My-ms02g 1020.0877
## Sp-ms02g  900.0103
## My-ms03g  928.6833
## Sp-ms03g  690.0129
## My-ms04h  508.5604
## Sp-ms04h    0.0000

Standard heatmap (Poisson distances)

pheatmap(samplePoisDistMatrix, 
         clustering_distance_cols = poisDists$dd,
         clustering_distance_rows = poisDists$dd,
         col = colors)

Prepare Dataframe for ggplot heatmap

samplePoisDistDF <- as.data.frame(samplePoisDistMatrix)
colnames(samplePoisDistDF) <- c("My-ms01e", "Sp-ms01e", "My-ms02g", "Sp-ms02g", 
                                   "My-ms03g", "Sp-ms03g", "My-ms04h", "Sp-ms04h")

# Make columns frow rownames
samplePoisDistDF$sample_1 <- rownames(samplePoisDistDF)

# Make long-format data frame
samplePoisDistDF <- samplePoisDistDF %>% 
  pivot_longer(!sample_1,
               names_to = "sample_2",
               values_to = "pois_dist")

GGplot Heatmap Poisson Distances

grDevices::png(filename = "htmp_pois_dist.png",  width = 12.5, height = 8, units = 'cm', res = 400)

htmp_pois_dist <- ggplot(samplePoisDistDF, aes(sample_1, sample_2, fill = pois_dist)) +
  geom_tile() +
  scale_fill_gradient(low = "#457b92", high = "#f5ffff") +
  labs(title = "Heatmap: samples poisson distances") +
  theme_minimal() +
  theme(
    axis.title.x = element_blank(),
    axis.title.y = element_blank(),
    legend.title = element_blank(),
    plot.title = element_text(size = 12),
    axis.text.x = element_text(angle = 45, hjust=1)
  )

htmp_pois_dist
dev.off()
## quartz_off_screen 
##                 2
knitr::include_graphics("htmp_pois_dist.png")

Step C-03: PCA plot

Another way to visualize sample distances is PCA plot. Here the data points are projected on a 2D plane on which one plane explains the most variation while the other plane might explain second most variation

C-03.I:

PCA results

# Save the results of pca as dataframe
PcaDF <- plotPCA(rld.ASE_Matrix, intgroup = c("sampleID", "hapType"), returnData = TRUE)

# Plot pca-plot using DESeq2 package
plotPCA(rld.ASE_Matrix, intgroup = c("sampleID", "hapType"))

GGplot PCA

grDevices::png(filename = "pca.png",  width = 12.5, height = 8, units = 'cm', res = 400)

pca <- ggplot(PcaDF,aes(x = PC1,y = PC2,
                       color = group,
                       shape = group)
) +
  # Make a scatter plot
  geom_point(size = 4) +
  scale_color_manual("Sample:hapType",
                     values = c("#E63946","#457b92", "#E63946","#457b92", "#E63946","#457b92", "#E63946", "#457b92")) +
  scale_shape_manual("Sample:hapType",
                     values = c(19, 19, 17, 17, 15, 15, 3, 3)) +
  labs(
    title = "PCA",
    y = "PC2: 23% variance",
    x = "PC1: 26% variance"
  ) +
  theme_bw() + 
  theme(
    panel.grid.major = element_line(colour = 'grey85', size = 0.2),
    panel.grid.minor = element_line(colour = 'grey90', size = 0.1),
    axis.title.x = element_text(size = 10),
    axis.title.y = element_text(size = 10),
    plot.title = element_text(size = 12),
    #legend.position = c(0.8,0.2), #You can adjust legend position here
    #legend.background = element_rect(color = "black")
  )

pca

dev.off()
## quartz_off_screen 
##                 2
knitr::include_graphics("pca.png")

Result: there is more variatio between samples than between haplotypes. But the variation between samples in haplotype group is similar to variation between samples in another haplotype group. The sample-by-Haplotype group forms a very distinct cluster i.e same haplotypes are more similar from different samples.

Step C-04 (**Optional): MDS plot - may be skipped in the pipeline

This is very similar to PCA plot. We use MDS (multidimensional scaling) function R base. This is useful when we don’t have matrix of data of matrix of distances.

C-04.I : MDS using matrix distance

Prepare Dataframe for MDS plot

mds <- as.data.frame(colData(rld.ASE_Matrix))  %>%
  cbind(cmdscale(sampleDistMatrix)) %>%
  mutate(group = paste0(sampleID, ":", hapType)) 

GGplot MSD plot

grDevices::png(filename = "mds.png",  width = 12.5, height = 8, units = 'cm', res = 400)

mds_plot <- ggplot(mds, aes(x = `1`, y = `2`, color = group, shape = group)) +
  geom_point(size = 4) +
  scale_color_manual("Sample:hapType",
                     values = c("#E63946","#457b92", "#E63946","#457b92", "#E63946","#457b92", "#E63946", "#457b92")) +
  scale_shape_manual("Sample:hapType",
                     values = c(19, 19, 17, 17, 15, 15, 3, 3)) +
  coord_fixed() +
  labs(
    title = "MDS"
    #y = "",
    #x = ""
  ) +
  theme_bw() + 
  theme(
    panel.grid.major = element_line(colour = 'grey85', size = 0.2),
    panel.grid.minor = element_line(colour = 'grey90', size = 0.1),
    axis.title.x = element_text(size = 10),
    axis.title.y = element_text(size = 10),
    plot.title = element_text(size = 12)
    #legend.position = c(0.8,0.2), #You can adjust legend position here
    #legend.background = element_rect(color = "black")
  )

mds_plot

dev.off()
## quartz_off_screen 
##                 2
knitr::include_graphics("mds.png")

C-04.II : MDS using poission distance

Prepare Dataframe for MDS plot with poisson distances

mdsPois <- as.data.frame(colData(ASE_Matrix)) %>%
  cbind(cmdscale(samplePoisDistMatrix)) %>%
  mutate(group = paste0(sampleID, ":", hapType))

GGplot MSD with Poisson Distances

grDevices::png(filename = "mds_pois.png",  width = 12.5, height = 8, units = 'cm', res = 400)
  
mds_pois <-  ggplot(mdsPois, aes(x = `1`, y = `2`, color = group, shape = group)) +
  geom_point(size = 4) +
  scale_color_manual("Sample:hapType",
                     values = c("#E63946","#457b92", "#E63946","#457b92", "#E63946","#457b92", "#E63946", "#457b92")) +
  scale_shape_manual("Sample:hapType",
                     values = c(19, 19, 17, 17, 15, 15, 3, 3)) +
  coord_fixed() +
  labs(
    title = "MDS: Poisson distances"
    #y = "",
    #x = ""
  ) +
  theme_bw() + 
  theme(
    panel.grid.major = element_line(colour = 'grey85', size = 0.2),
    panel.grid.minor = element_line(colour = 'grey90', size = 0.1),
    axis.title.x = element_text(size = 10),
    axis.title.y = element_text(size = 10),
    plot.title = element_text(size = 12),
    #legend.position = c(0.8,0.2), #You can adjust legend position here
    #legend.background = element_rect(color = "black")
  )

mds_pois

dev.off()
## quartz_off_screen 
##                 2
knitr::include_graphics("mds_pois.png")

Step C-05: Check genes with high expression for both haplotypes

Step C-05 I: Prepare data

Calculate unqC_Sum quartiles for each of the samples

quantsDF <- counts.ForBinom %>%
  group_by(sample) %>%
  summarise(q1 = quantile(unqC_Sum, probs = 0.25),
            q2 = quantile(unqC_Sum, probs = 0.50),
            q3 = quantile(unqC_Sum, probs = 0.75))

quantsDF
## # A tibble: 4 × 4
##   sample    q1    q2    q3
##   <chr>  <dbl> <dbl> <dbl>
## 1 2ms01e   146   430 1074 
## 2 2ms02g   152   486 1136 
## 3 2ms03g   142   404 1050 
## 4 2ms04h   134   436 1060.

Left join 3rd Quarile threshold and check if unqC_sum (unqC_My + unqC_Sp) lie in the 4th quartile

countsWithQuants <- counts.ForBinom %>%
  left_join(quantsDF %>% dplyr::select(sample, q3), by = "sample") %>%
  mutate(high_exp = ifelse(unqC_Sum >= q3, 1, 0))

See some summary stats

countsWithQuants %>% group_by(sample) %>%
  summarise(n = sum(high_exp))
## # A tibble: 4 × 2
##   sample     n
##   <chr>  <dbl>
## 1 2ms01e   396
## 2 2ms02g   389
## 3 2ms03g   400
## 4 2ms04h   399

List of unique genes with high expression for each sample

high_exp_01 <- subset(countsWithQuants, sample == "2ms01e" & high_exp == 1)$transcript_ID
high_exp_02 <- subset(countsWithQuants, sample == "2ms02g" & high_exp == 1)$transcript_ID
high_exp_03 <- subset(countsWithQuants, sample == "2ms03g" & high_exp == 1)$transcript_ID
high_exp_04 <- subset(countsWithQuants, sample == "2ms04h" & high_exp == 1)$transcript_ID

#list of lists with genes for each sample
input_high_exp <- list("2ms01e" = high_exp_01, "2ms02g" = high_exp_02, "2ms03g" =high_exp_03, "2ms04h" = high_exp_04)

C-05 II: Venn plot of genes which have overall expression > 3rd quartile

Needed package

library(ggvenn)
## Loading required package: grid

Venn plot

grDevices::png(filename = "venn_high_exp.png",  width = 12.5, height = 8, units = 'cm', res = 400)

venn_high_exp <-ggvenn(
  input_high_exp, 
  fill_color = c("#0073C2FF", "#EFC000FF", "#868686FF", "#CD534CFF"),
  stroke_size = 0.4, set_name_size = 3, text_size = 3
  )

venn_high_exp

dev.off()
## quartz_off_screen 
##                 2
knitr::include_graphics("venn_high_exp.png")

Part D: Binomial Tests

Step D-01: Run binomial test

counts.ForBinom$binom_p = apply(counts.ForBinom[,c("unqC_My", "unqC_Sp")], 
                     1, function(x)binom.test(x[1],x[1]+x[2],p=0.5, alternative = "two.sided")$p.value)

Step D-02: Obtain Results of binomial tests

counts.ForBinom$p0.05 <- ifelse(counts.ForBinom$binom_p < 0.05, 1, 0)
counts.ForBinom$p0.01 <- ifelse(counts.ForBinom$binom_p < 0.01, 1, 0)
counts.ForBinom$p0.001 <- ifelse(counts.ForBinom$binom_p < 0.001, 1, 0)

Part E: ASE analyses (modified DESeq method)

Step E-01: Run the DESeq using the design

dds.ASE_Data <- DESeq(ASE_Matrix, fitType = "local", betaPrior = FALSE)
## using pre-existing size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing

Step E-02: Build the results table

Details: Calling results without any arguments will extract the estimated log2 fold changes and p values for the last variable in the design formula. If there are more than 2 levels for this variable { as is the case in this analysis { results will extract the results table for a comparison of the last level over the first level.

Dataset with results

# Set "Sp" counts as the reference level
result.ASE_Data <- results(
  dds.ASE_Data, contrast = c("hapType", "My", "Sp"), 
  alpha = 0.05)

Number of genes with significant difference (TRUE) and insignificant difference (FALSE)

table(result.ASE_Data$padj < 0.1)
## 
## FALSE  TRUE 
##  1191   570
summary(result.ASE_Data)
## 
## out of 1761 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up)       : 267, 15%
## LFC < 0 (down)     : 191, 11%
## outliers [1]       : 0, 0%
## low counts [2]     : 0, 0%
## (mean count < 2)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results

Step E-03 (**Optional):

If we want to make the sig genes threshold more stricter we can narrow down padj and/or also use higher L2FC

E-03 I: LFC Threshold == 1 & adjusted p-value < 0.05

Obtain results

result.ASE_Data.05 <- results(
  dds.ASE_Data, contrast = c("hapType", "My", "Sp"),
  alpha = 0.05, lfcThreshold = 1)

Summary of the results

table(result.ASE_Data.05$padj < 0.05)
## 
## FALSE  TRUE 
##  1605    94
summary(result.ASE_Data.05)
## 
## out of 1761 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 1.00 (up)    : 62, 3.5%
## LFC < -1.00 (down) : 32, 1.8%
## outliers [1]       : 0, 0%
## low counts [2]     : 62, 3.5%
## (mean count < 6)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results

E-03 II: LFC Threshold == 1

(**Optional): If we want the results that have gene expression at least double or halving in My i.e lfc of 1 is 2^1 = 2 (double the expression)

Obtain Results

result.ASE_Data.LFC1 <- results(dds.ASE_Data, lfcThreshold = 1,
                                contrast = c("hapType", "My", "Sp"))

Summary of the results

table(result.ASE_Data.LFC1$padj < 0.05)
## 
## FALSE  TRUE 
##  1670    91
summary(result.ASE_Data.LFC1)
## 
## out of 1761 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 1.00 (up)    : 64, 3.6%
## LFC < -1.00 (down) : 35, 2%
## outliers [1]       : 0, 0%
## low counts [2]     : 0, 0%
## (mean count < 2)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results

Step E-04: Convert the results table to dataframe.

Reslts

# Results as dataframe
result.ASE.asDF <- as.data.frame(result.ASE_Data)
result.ASE.LFC1.asDF <- as.data.frame(result.ASE_Data.LFC1)

result.ASE.asDF <- rownames_to_column(
  result.ASE.asDF, var = "transcript_ID")
result.ASE.LFC1.asDF <- rownames_to_column(
  result.ASE.LFC1.asDF , var = "transcript_ID")

head(result.ASE.asDF)
##   transcript_ID baseMean log2FoldChange     lfcSE       stat       pvalue
## 1  AL2G10020.t1  1748.25     -2.2956110 1.2112172 -1.8952925 0.0580536565
## 2  AL2G10030.t1  1089.75      0.6501060 0.1675992  3.8789320 0.0001049161
## 3  AL2G10050.t1   160.25      0.7450848 0.8984919  0.8292616 0.4069564221
## 4  AL2G10060.t1    50.00      0.6378227 0.4900287  1.3016026 0.1930522825
## 5  AL2G10070.t1    23.75     -0.9689420 0.7103124 -1.3641068 0.1725339419
## 6  AL2G10080.t1    47.75      0.1333075 0.3683307  0.3619235 0.7174092155
##           padj
## 1 0.1577661868
## 2 0.0009521844
## 3 0.6059248002
## 4 0.3687256720
## 5 0.3421738043
## 6 0.8530493180
head(result.ASE.LFC1.asDF)
##   transcript_ID baseMean log2FoldChange     lfcSE      stat    pvalue padj
## 1  AL2G10020.t1  1748.25     -2.2956110 1.2112172 -1.069677 0.2847648    1
## 2  AL2G10030.t1  1089.75      0.6501060 0.1675992  0.000000 1.0000000    1
## 3  AL2G10050.t1   160.25      0.7450848 0.8984919  0.000000 1.0000000    1
## 4  AL2G10060.t1    50.00      0.6378227 0.4900287  0.000000 1.0000000    1
## 5  AL2G10070.t1    23.75     -0.9689420 0.7103124  0.000000 1.0000000    1
## 6  AL2G10080.t1    47.75      0.1333075 0.3683307  0.000000 1.0000000    1

Step E-05: Venn plot comparing general ASE results and ASE results with threshold

Here we check how many genes from general ASE results table inter

Prepare data for Venn plot

sign_results <- subset(result.ASE.asDF, padj <= 0.001)$transcript_ID
sign_results_lfc1 <-subset(result.ASE.LFC1.asDF, padj <= 0.001)$transcript_ID

input_ase_results <- list("No threshold" = sign_results, "LFC Threshold = 1" = sign_results_lfc1)

Venn plot

grDevices::png(filename = "venn_ase_sign.png",  width = 12.5, height = 8, units = 'cm', res = 400)

venn_ase_sign <- ggvenn(
  input_ase_results, 
  fill_color = c("#0073C2FF", "#EFC000FF"),
  stroke_size = 0.4, set_name_size = 3, text_size = 3
)

venn_ase_sign

dev.off()
## quartz_off_screen 
##                 2
knitr::include_graphics("venn_ase_sign.png")

Results: basically, the results seem to be obvious. When applying a threshold we simply have lower number of genes with adjusted p-value on the same level.

Part F: Plotting results

Step F-01: Before we start plotting we add some extra information to the results table

This extra step helps to add more metadata information to the “results” for better analyses and plotting

Step F-01.I : update the data/matrix with geneNames, geneID

  • We can use biomart
  • Or a simple column merge (by matching row names) - We use this :)

create geneMap database

geneMap <- counts.ms1e2g3g4h.filt01 %>%
  distinct(start, transcript_ID, .keep_all = TRUE) %>%
  data.frame(row.names = "transcript_ID") %>%
  mutate(
    unqC_My.total = (unqC_My.ms01e + unqC_My.ms02g + unqC_My.ms03g + unqC_My.ms04h),
    unqC_Sp.total = (unqC_Sp.ms01e + unqC_Sp.ms02g + unqC_Sp.ms03g + unqC_Sp.ms04h)) %>%
  select(contig, start, end, gene_Name, gene_ID, unqC_My.total, unqC_Sp.total)
# Note: this geneMap database should have exact same number of rows as "results"
# plus the "rownames" should match between "results" and "geneMap"

head(geneMap)
##              contig start   end gene_Name   gene_ID unqC_My.total unqC_Sp.total
## AL2G10020.t1      2  1116  3595         - AL2G10020          3974         10012
## AL2G10030.t1      2  5226  7090       CP5 AL2G10030          5426          3292
## AL2G10050.t1      2 22602 24361         - AL2G10050           582           700
## AL2G10060.t1      2 29596 30510         - AL2G10060           242           158
## AL2G10070.t1      2 46995 48041       BLT AL2G10070            68           122
## AL2G10080.t1      2 48700 50081         - AL2G10080           188           194

Step F-01.II :now merge this geneMap with resultsData using row.names match.

result.ASE_Data.withGeneMap <- cbind(result.ASE_Data, geneMap)
# Result/Note: this geneMap database should have exact same number of rows as "results"
# plus the "rownames" should match between "results" and "geneMap"

Just check the number of rows

nrow(result.ASE_Data) == nrow(result.ASE_Data.withGeneMap)
## [1] TRUE

Additionally: make a data frame of geneMap and resultsData from S4 object

result.withGeneMap.asDF <- as.data.frame(result.ASE_Data.withGeneMap,
  row.names = rownames(result.ASE_Data.withGeneMap))

result.withGeneMap.asDF <- 
  mutate(result.withGeneMap.asDF, 
         Significance = if_else(padj <= 0.01 & abs(log2FoldChange) >= 4, "|L2FC| > 4 & padj < 0.01", 
                       if_else(padj <= 0.05 & abs(log2FoldChange) >= 2, "|L2FC| > 2 & padj < 0.05", "non-Sig")),
         unqC_total = unqC_My.total + unqC_Sp.total)

Now, we start making plot either using “results” table or updated results table

Step F-02 : Counts plot - using raw counts

Plot of the raw read counts stratified by two haplotypes for specific gene

topGene, PIN1, PIN3, TCP15 - Plot

Step F-02.I :COunt plot - Top gene (gene with the lowers p-value adjusted)

Top gene - DAta

topGene.data <- plotCounts(dds.ASE_Data, gene=which.min(result.ASE_Data.withGeneMap$padj),
                intgroup="hapType", returnData=TRUE)

See which gene it is

result.withGeneMap.asDF %>%
  filter(padj == min(padj))
##              baseMean log2FoldChange     lfcSE     stat       pvalue
## AL2G19580.t1      371       2.599961 0.1739049 14.95048 1.546286e-50
##                     padj contig   start     end gene_Name   gene_ID
## AL2G19580.t1 2.72301e-47      2 7856651 7857947         - AL2G19580
##              unqC_My.total unqC_Sp.total             Significance unqC_total
## AL2G19580.t1          2544           424 |L2FC| > 2 & padj < 0.05       2968

Top gene - plot

grDevices::png(filename = "top_gene.png",  width = 12.5, height = 8, units = 'cm', res = 400)

top_gene <- ggplot(topGene.data, aes(x = hapType, y = count, color = sampleID, group = sampleID)) +
  scale_y_log10() + geom_point(size = 2, position=position_dodge(0.4)) +
  geom_line(position=position_dodge(0.4), size = 1) + 
  labs(title = "Raw counts for the Gene with the least p-vaue", 
       x = "Haplotype", y = "Raw counts") + 
  scale_color_manual("Sample", values = c("#7F58AF", "#64C5EB", "#E84D8A", "#FEB326")) +
  scale_x_discrete(limits = c("Sp", "My")) +
  theme_bw() + 
  theme(axis.text=element_text(size=12),
        axis.title=element_text(size=10),
        plot.title = element_text(size = rel(1)),
        panel.grid.major = element_line(colour = 'grey85', size = 0.2),
        panel.grid.minor = element_line(colour = 'grey90', size = 0.1),
        axis.title.x = element_text(size = 10),
        axis.title.y = element_text(size = 10),
        #legend.position = c(0.8,0.2), #You can adjust legend position here
        #legend.background = element_rect(color = "black")
  )


top_gene

dev.off()
## quartz_off_screen 
##                 2
knitr::include_graphics("top_gene.png")

Result: this plots the raw counts by Haplotype for the given gene the points are jittered to prevent overlaps the lines are connected between Haplotypes of same samples.

Step F-02.II: Count plot for PIN3

PIN3 - Data

d.PIN3 <- plotCounts(
  dds.ASE_Data, 
  gene=which(result.ASE_Data.withGeneMap$gene_Name == "PIN3"), 
  intgroup="hapType", returnData=TRUE)

PIN3 - Plot

grDevices::png(filename = "pin3.png",  width = 12.5, height = 8, units = 'cm', res = 400)

pin3 <- ggplot(d.PIN3, aes(x = hapType, y = count, color = sampleID, group = sampleID)) +
  scale_y_log10() +  geom_point(size = 2, position=position_dodge(0.4)) +
  geom_line(position=position_dodge(0.4), size = 1) + 
  labs(title = "Raw counts for PIN3", 
       x = "Haplotype", y = "Raw counts") + 
  scale_color_manual("Sample", values = c("#7F58AF", "#64C5EB", "#E84D8A", "#FEB326")) +
  theme_bw() + 
  theme(axis.text=element_text(size=12),
        axis.title=element_text(size=10),
        plot.title = element_text(size = rel(1)),
        panel.grid.major = element_line(colour = 'grey85', size = 0.2),
        panel.grid.minor = element_line(colour = 'grey90', size = 0.1),
        axis.title.x = element_text(size = 10),
        axis.title.y = element_text(size = 10),
        #legend.position = c(0.8,0.2), #You can adjust legend position here
        #legend.background = element_rect(color = "black")
  )

pin3

dev.off()
## quartz_off_screen 
##                 2
knitr::include_graphics("pin3.png")

PIN3 - p-value

subset(result.withGeneMap.asDF, gene_Name == "PIN3")$padj
## [1] 1.053251e-07

Step F-02.III: Count plot for PIN1

PIN1 - Data

d.PIN1 <- plotCounts(
  dds.ASE_Data, 
  gene=which(result.ASE_Data.withGeneMap$gene_Name == "PIN1"), 
  intgroup="hapType", returnData=TRUE)

PIN1 - Plot

grDevices::png(filename = "pin1.png",  width = 12.5, height = 8, units = 'cm', res = 400)

pin1 <- ggplot(d.PIN1, aes(x = hapType, y = count, color = sampleID, group = sampleID)) +
  scale_y_log10() +  geom_point(size = 2, position=position_dodge(0.4)) +
  geom_line(position=position_dodge(0.4), size = 1) + 
  labs(title = "Raw counts for PIN1", 
       x = "Haplotype", y = "Raw counts") + 
  scale_color_manual("Sample", values = c("#7F58AF", "#64C5EB", "#E84D8A", "#FEB326")) +
  theme_bw() + 
  theme(axis.text=element_text(size=12),
        axis.title=element_text(size=10),
        plot.title = element_text(size = rel(1)),
        panel.grid.major = element_line(colour = 'grey85', size = 0.2),
        panel.grid.minor = element_line(colour = 'grey90', size = 0.1),
        axis.title.x = element_text(size = 10),
        axis.title.y = element_text(size = 10),
        #legend.position = c(0.8,0.2), #You can adjust legend position here
        #legend.background = element_rect(color = "black")
  )

pin1

dev.off()
## quartz_off_screen 
##                 2
knitr::include_graphics("pin1.png")

PIN1 - p-value

subset(result.withGeneMap.asDF, gene_Name == "PIN1")$padj
## [1] 0.0809853

Step F-02.III: Count plot for TCP15

TCP15 - Data

d.TCP15 <- plotCounts(
  dds.ASE_Data, 
  gene=which(result.ASE_Data.withGeneMap$gene_ID == "AL2G28860"), 
  intgroup="hapType", returnData=TRUE)

TCP15 - Plot

grDevices::png(filename = "tcp15.png",  width = 12.5, height = 8, units = 'cm', res = 400)

tcp15 <- ggplot(d.TCP15, aes(x = hapType, y = count, color = sampleID, group = sampleID)) +
  scale_y_log10() +  geom_point(size = 2, position=position_dodge(0.4)) +
  geom_line(position=position_dodge(0.4), size = 1) + 
  labs(title = "Raw counts for TCP15", 
       x = "Haplotype", y = "Raw counts") + 
  scale_color_manual("Sample", values = c("#7F58AF", "#64C5EB", "#E84D8A", "#FEB326")) +
  scale_x_discrete(limits = c("Sp", "My")) +
  theme_bw() + 
  theme(axis.text=element_text(size=12),
        axis.title=element_text(size=10),
        plot.title = element_text(size = rel(1)),
        panel.grid.major = element_line(colour = 'grey85', size = 0.2),
        panel.grid.minor = element_line(colour = 'grey90', size = 0.1),
        axis.title.x = element_text(size = 10),
        axis.title.y = element_text(size = 10),
        #legend.position = c(0.8,0.2), #You can adjust legend position here
        #legend.background = element_rect(color = "black")
  )

tcp15

dev.off()
## quartz_off_screen 
##                 2
knitr::include_graphics("tcp15.png")

TCP15 - p-value

subset(result.withGeneMap.asDF, gene_ID == "AL2G28860")$padj
## [1] 0.9814344

F-02 IV: Count plot for TCP22

TCP22 - Data

d.TCP22 <- plotCounts(
  dds.ASE_Data, 
  gene=which(result.ASE_Data.withGeneMap$gene_ID == "AL2G31640"), 
  intgroup="hapType", returnData=TRUE)

TCP22 - Plot

grDevices::png(filename = "tcp22.png",  width = 12.5, height = 8, units = 'cm', res = 400)

tcp22 <- ggplot(d.TCP22, aes(x = hapType, y = count, color = sampleID, group = sampleID)) +
  scale_y_log10() +  geom_point(size = 2, position=position_dodge(0.4)) +
  geom_line(position=position_dodge(0.4), size = 1) + 
  labs(title = "Raw counts for TCP22", 
       x = "Haplotype", y = "Raw counts") + 
  scale_color_manual("Sample", values = c("#7F58AF", "#64C5EB", "#E84D8A", "#FEB326")) +
  scale_x_discrete(limits = c("Sp", "My")) +
  theme_bw() + 
  theme(axis.text=element_text(size=12),
        axis.title=element_text(size=10),
        plot.title = element_text(size = rel(1)),
        panel.grid.major = element_line(colour = 'grey85', size = 0.2),
        panel.grid.minor = element_line(colour = 'grey90', size = 0.1),
        axis.title.x = element_text(size = 10),
        axis.title.y = element_text(size = 10),
        #legend.position = c(0.8,0.2), #You can adjust legend position here
        #legend.background = element_rect(color = "black")
  )

tcp22

dev.off()
## quartz_off_screen 
##                 2
knitr::include_graphics("tcp22.png")

TCP22 - p-value

subset(result.withGeneMap.asDF, gene_ID == "AL2G31640")$padj
## [1] 0.8161894

F-02 V: Count plot for AP1

AP1 - Data

d.AP1 <- plotCounts(
  dds.ASE_Data, 
  gene=which(result.ASE_Data.withGeneMap$gene_ID == "AL2G28180"), 
  intgroup="hapType", returnData=TRUE)

AP1 - Plot

grDevices::png(filename = "ap1.png",  width = 12.5, height = 8, units = 'cm', res = 400)

ap1 <- ggplot(d.AP1, aes(x = hapType, y = count, color = sampleID, group = sampleID)) +
  scale_y_log10() +  geom_point(size = 2, position=position_dodge(0.4)) +
  geom_line(position=position_dodge(0.4), size = 1) + 
  labs(title = "Raw counts for AP1", 
       x = "Haplotype", y = "Raw counts") + 
  scale_color_manual("Sample", values = c("#7F58AF", "#64C5EB", "#E84D8A", "#FEB326")) +
  scale_x_discrete(limits = c("Sp", "My")) +
  theme_bw() + 
  theme(axis.text=element_text(size=12),
        axis.title=element_text(size=10),
        plot.title = element_text(size = rel(1)),
        panel.grid.major = element_line(colour = 'grey85', size = 0.2),
        panel.grid.minor = element_line(colour = 'grey90', size = 0.1),
        axis.title.x = element_text(size = 10),
        axis.title.y = element_text(size = 10),
        #legend.position = c(0.8,0.2), #You can adjust legend position here
        #legend.background = element_rect(color = "black")
  )

ap1

dev.off()
## quartz_off_screen 
##                 2
knitr::include_graphics("ap1.png")

AP1 - p-value

subset(result.withGeneMap.asDF, gene_ID == "AL2G28180")$padj
## [1] 0.9107402

Step F-03: Prepare Data for Diagnostic Plots

Step F-03 I. Make significance categories

Here we make categories of genes depending on the Log2FoldChange and p-value adjusted.

The attitude is as follows:

  • “Lower” - when Log2FoldChange < 0.6 and padj <= 0.05. It means that Sp allele is expressed more than My and the difference is significant.
  • “Upper” - when Log2FoldChange > 0.6 and padj <= 0.05. It means that My allele is expressed more than Sp and the difference is significant.
  • “NS” - means that the difference between allele expression is insignificant.

We take < 0.6 and > 0.6 for Log2FoldChange according to this link: https://biocorecrg.github.io/CRG_RIntroduction/volcano-plots.html

In addition, we make separate groups for significance which derived purely according to p-values.

# Make a copy of the DESeq2 results table
result.ASE.copy <- result.ASE.asDF

# -Log10 of p-value adjusted
result.ASE.copy$padj_log10 <- -log10(result.ASE.copy$padj)

# Make groups of genes depending on the 
result.ASE.copy <- result.ASE.copy %>%
  mutate(
    cat = case_when(
    log2FoldChange < -0.6 & padj <= 0.05 ~ "Lower",
    log2FoldChange > 0.6 & padj <= 0.05 ~ "Upper",
    TRUE ~ "NS"),
    sign = case_when(
      padj <= 0.001 ~ "< 0.001",
      padj <= 0.01 ~ "< 0.01",
      padj <= 0.05 ~ "< 0.05",
      padj <= 0.1 ~ "< 0.1",
      TRUE ~ "ns")
  )

F-03 II: A brief look on significance categories

Check number number of genes in each category

# Check categories
knitr::kable(table(result.ASE.copy$cat))
Var1 Freq
Lower 162
NS 1377
Upper 222

Check number of genes by significance level

We interpret the table as follows:

category p-value interval
< 0.001 p-value <= 0.001
< 0.01 0.001 < p-value <= 0.01
< 0.05 0.01 < p-value <= 0.05
< 0.01 0.05 < p-value <= 0.1
ns p-value > 0.1
knitr::kable(table(result.ASE.copy$sign))
Var1 Freq
< 0.001 196
< 0.01 90
< 0.05 172
< 0.1 112
ns 1191

Plot the number of genes by significance levels

result.ASE.copy %>%
  group_by(sign) %>%
  summarise(n = n()) %>%
  ggplot(aes(x = sign, y = n, fill = sign)) +
  geom_bar(stat = "identity") +
  geom_text(aes(label = n), vjust = 2) +
  labs(x = "Significance level",
       y = "Count") +
  scale_fill_manual("Sign. level",
                    values = c("#457b92", "#6f9baa", "#9abcc4", "#c7dde0", "#9e9e9e")) +
  theme_bw() + 
  theme(
    panel.grid.major = element_line(colour = 'grey85', size = 0.2),
    panel.grid.minor = element_line(colour = 'grey90', size = 0.1),
    axis.title.x = element_text(size = 14),
    axis.title.y = element_text(size = 14),
    plot.title = element_text(size = 16),
    axis.text.x = element_text(angle = 45, hjust=1),
    #legend.position = c(0.8,0.2), #You can adjust legend position here
    #legend.background = element_rect(color = "black")
  )

Step F-04: Diagnostic Plots

F-04 I: Volcano plot

Make a volcano plot

grDevices::png(filename = "volc_wald.png",  width = 12.5, height = 8, units = 'cm', res = 400)

# Volcano Plot
volc_wald <- result.ASE.copy %>%
  ggplot(mapping = aes(x = log2FoldChange, y = padj_log10)) +
  geom_point(data = result.ASE.copy %>% filter(cat == "Lower"), aes(color = sign), alpha = 0.9) +
  scale_color_manual("My < Sp", #legend title
                     values = c("#457b92","#7eb1c7","#b8eaff"), #colors for sign_level
                     guide = guide_legend(override.aes = list(size = 4)) #size of the points inside legend
  ) +
  ggnewscale::new_scale_color() + # Need to add new gradient scale
  # Beyond abline
  geom_point(data = result.ASE.copy %>% filter(cat == "Upper"), aes(color = sign), alpha = 0.9) +
  scale_color_manual("My > Sp", 
                     values = c("#E63946","#f98e89","#ffd6d2"),
                     guide = guide_legend(override.aes = list(size = 4))
  ) +
  ggnewscale::new_scale_color() +
  # Not significant
  geom_point(data = result.ASE.copy %>% filter(cat == "NS"), aes(color = "Not sign."), size = 1, alpha = 0.8) +
  scale_color_manual("",values= "#525252", labels = "Not sign.",
                     guide = guide_legend(override.aes = list(size = 4))
  ) +
  #ggnewscale::new_scale_color() +
  labs(x = "Log2 Folds Change",
       y = "-log10 p-value",
       title = "Volcano plot") +
  theme_bw() + 
  theme(
    panel.grid.major = element_line(colour = 'grey85', size = 0.2),
    panel.grid.minor = element_line(colour = 'grey90', size = 0.1),
    axis.title.x = element_text(size = 10),
    axis.title.y = element_text(size = 10),
    plot.title = element_text(size = 12),
    #legend.key = element_rect(color = "black")
    legend.title = element_text(size = 10),
    legend.text = element_text(size = 10),
    legend.spacing.y = unit(0.05, "cm")
  )

volc_wald

dev.off()
## quartz_off_screen 
##                 2
knitr::include_graphics("volc_wald.png")

F-04 II: MA-plot (ggpubr version)

Upload the package

library(ggpubr)
## 
## Attaching package: 'ggpubr'
## The following object is masked from 'package:plyr':
## 
##     mutate

Make MA-plot with ggpubr package

grDevices::png(filename = "ma_pubr.png",  width = 12.5, height = 8, units = 'cm', res = 400)

ma_pubr <- ggpubr::ggmaplot(result.ASE_Data.withGeneMap, fdr = 0.05, fc = 1.5, size = 0.4,
                 main = "MA plot", legend = "top", top = 20, 
                 palette = c("#B31B21", "#1465AC", "darkgray"),
                 genenames = as.vector(if_else(result.ASE_Data.withGeneMap$padj < 0.001, result.ASE_Data.withGeneMap$gene_Name, NULL)), 
                 font.label = c("bold", 6), font.legend = c("bold", 8), font.main = c("bold", 8),
                 font.x = 8, font.y = 8,
                 ggtheme = ggplot2::theme_bw())

ma_pubr

dev.off()
## quartz_off_screen 
##                 2
knitr::include_graphics("ma_pubr.png")

F-04 III: MA-plot (ggplot2 version)

grDevices::png(filename = "md_wald.png",  width = 12.5, height = 8, units = 'cm', res = 400)

md_wald <- result.ASE.copy %>%
  ggplot(mapping = aes(y = log2FoldChange, x = baseMean)) +
  geom_point(data = result.ASE.copy %>% filter(cat == "Lower"), aes(color = sign), alpha = 0.9) +
  scale_color_manual("My < Sp", #legend title
                     values = c("#457b92","#7eb1c7","#b8eaff"), #colors for sign_level
                     guide = guide_legend(override.aes = list(size = 4)) #size of the points inside legend
  ) +
  ggnewscale::new_scale_color() + # Need to add new gradient scale
  # Beyond abline
  geom_point(data = result.ASE.copy %>% filter(cat == "Upper"), aes(color = sign), alpha = 0.9) +
  scale_color_manual("My > Sp", 
                     values = c("#E63946","#f98e89","#ffd6d2"),
                     guide = guide_legend(override.aes = list(size = 4))
  ) +
  ggnewscale::new_scale_color() +
  # Not significant
  geom_point(data = result.ASE.copy %>% filter(cat == "NS"), aes(color = "Not sign."), size = 1, alpha = 0.8) +
  scale_color_manual("",values= "#525252", labels = "Not sign.",
                     guide = guide_legend(override.aes = list(size = 4))
  ) +
  #ggnewscale::new_scale_color() +
  labs(y = "Log2 Folds Change",
       x = "Log2 mean expression",
       title = "MA Plot") +
  scale_x_continuous(trans = "log2") +
  theme_bw() + 
  theme(
    panel.grid.major = element_line(colour = 'grey85', size = 0.2),
    panel.grid.minor = element_line(colour = 'grey90', size = 0.1),
    axis.title.x = element_text(size = 10),
    axis.title.y = element_text(size = 10),
    plot.title = element_text(size = 12),
    #legend.key = element_rect(color = "black")
    legend.title = element_text(size = 10),
    legend.text = element_text(size = 10),
    legend.spacing.y = unit(0.05, "cm")
  )

md_wald

dev.off()
## quartz_off_screen 
##                 2
knitr::include_graphics("md_wald.png")

F-04 IV: Additional Plots (not included into the final text)

Plot dispersion estimates

plotDispEsts(dds.ASE_Data, ylim = c(1e-6, 1e1))

Histogram of p-values

hist( result.ASE_Data.withGeneMap$pvalue, 
      breaks=0:20/20, col="grey50", border = "white", 
      main = "Histogram of p-values", xlab = "p-value")

Allelic Expression Balance plot

This plot gene expression (log2FoldChanges i.e ASE) by chromosome and genomic position. Then color the points where the p-adj are significant.

ggplot(result.withGeneMap.asDF, aes(x=start, y= log2FoldChange)) +
  labs(title = "Allele Expression Balance Plot: My vs. Sp", x = "Genome Position",
       y = "log2FoldChange: \nMy haplotype vs. Sp haplotype") + 
    theme_bw() +
  theme(plot.title = element_text(hjust = 0.5), legend.position = "bottom") +
  geom_hline(yintercept = c(-4, -2, 2, 4) , 
             color = c("darkgreen", "darkred", "darkred", "darkgreen")) +
  geom_point(aes(x=start, y= log2FoldChange, 
                 color = Significance, shape = Significance, 
                 size = padj), 
             position=position_jitter(w=0.1,h=0)) +
  scale_shape_manual(values=c(3, 4, 5)) #+

  #scale_color_manual(values = c("green", "red", "black"))

log(XY) plot

This helps us to show how the log2(unique counts) changes for My haplotype vs. Sp haplotype.

ggplot(result.withGeneMap.asDF, aes(x=log2(unqC_Sp.total+1) , y= log2(unqC_My.total+1))) +
  labs(title = "log2 X-Y plot", x = "log2(# of unique Sp haplotype reads + 1)",
       y = "log2(# of unique My haplotype reads + 1)") + 
  theme_bw() +
  theme(plot.title = element_text(hjust = 0.5), legend.position = "bottom") +
  geom_abline(slope =  1, intercept =  0, color = "darkred")+
  geom_point(aes(x=log2(unqC_Sp.total+1) , y= log2(unqC_My.total+1), 
                 color = Significance, shape = Significance, 
                 size = padj),
             position=position_jitter(w=0.1,h=0)) +
  scale_shape_manual(values=c(3, 4, 5)) #+

  #scale_color_manual(name = "legend of significance")

Manhattan plot

ggplot(result.withGeneMap.asDF, aes(x = start, y = -log10(padj))) +
  geom_point(aes(x = start, y = -log10(padj)))

Step F-05: Prepare data for boxplots

F-05 I: Data for boxplot with top-20 genes by the lowes p-value

Additional manipulations with original data

counts_2ms01e.joinTest <- counts_2ms01e %>%
  distinct(transcript_ID, .keep_all = TRUE)

counts_2ms02g.joinTest <- counts_2ms02g %>%
  distinct(transcript_ID, .keep_all = TRUE)

counts_2ms03g.joinTest <- counts_2ms03g %>%
  distinct(transcript_ID, .keep_all = TRUE)

counts_2ms04h.joinTest <- counts_2ms04h %>%
  distinct(transcript_ID, .keep_all = TRUE)

Get the data with all sample combined together (no filters applied)

# Union samples verically
data_all <- rbind(counts_2ms01e.joinTest,
                  counts_2ms02g.joinTest,
                  counts_2ms03g.joinTest,
                  counts_2ms04h.joinTest)

# Get the variable which represent the QTL region
data_all <- data_all %>% 
  mutate(unqC_Sum = unqC_My + unqC_Sp,
         qtl_pos = ifelse(start >= 12875963 & start <= 16344528, 1, 0))

Get the maximum allele expression (between both Sp and My) for each gene

This data will than be used for labelling genes.

max_exp <- data_all %>%
  select(transcript_ID, gene_ID, unqC_My, unqC_Sp) %>%
  pivot_longer(c(unqC_My, unqC_Sp), names_to = "allele", values_to = "exp") %>%
  group_by(transcript_ID) %>%
  summarise(max_exp = max(exp))

Obtain top-20 genes by adjusted Wald-test p-value

genes_20 <- result.withGeneMap.asDF %>%
  arrange(padj) %>%
  select(gene_ID, start, padj) %>%
  tibble::rownames_to_column("transcript_ID") %>%
  head(20) %>%
  left_join(max_exp, by = "transcript_ID") %>%
  mutate(max_log = log2(max_exp + 1) + 1,
         qtl_pos = ifelse(start >= 12875963 & start <= 16344528, 1, 0)) %>% 
  arrange(start)

Prepare data for boxplot with top-20 genes

data_bx_20 <- data_all %>% 
  filter(gene_ID %in% genes_20$gene_ID) %>%
  select(transcript_ID, gene_ID, start, unqC_My, unqC_Sp) %>%
  pivot_longer(c(unqC_My, unqC_Sp), names_to = "allele", values_to = "exp") %>%
  mutate(log_exp = log2(exp + 1), 
         qtl_pos = ifelse(start >= 12875963 & start <= 16344528, 1, 0))

genes_20_table <- data_all %>%
  filter(gene_ID %in% genes_20$gene_ID) %>%
  select(transcript_ID, unqC_My, unqC_Sp) %>%
  group_by(transcript_ID) %>%
  summarise(mean_My = mean(unqC_My),
            sd_My = sd(unqC_My), 
            mean_Sp = mean(unqC_Sp),
            sd_Sp = sd(unqC_Sp)) %>%
  left_join(result.withGeneMap.asDF %>% 
              tibble::rownames_to_column("transcript_ID") %>%
              select(transcript_ID, gene_ID, padj, start),
            by = "transcript_ID") %>%
  mutate(qtl_pos = ifelse(start >= 12875963 & start <= 16344528, 1, 0))

F-05 II: Data for boxplot for Big Boxplot №1 (genes with p-value < 0.001)

In the part F-05 II and following parts F-05 III and F-05 IV we build “Big Boxplots”. These boxplots contain all the genes which follow a specific condidition of significance for difference in allele expression.

This critereas: * Big Boxplot №1 - all the genes which have Wald-test p-value < 0.001 * Big Boxplot №2 - all the genes which are in category: |L2FC| > 2 & padj < 0.05 * Big Boxplot №3 - all the genes which are in category: |L2FC| > 4 & padj < 0.01

Colors for coloring some separate boxplots

colors <- c("#00A5E3", "#8DD7BF", "#FF96C5", "#FF5768", "#FFBF65", "#00B0BA")

Get the data for Big Boxplot №1 (genes with p-value < 0.001)

genes_1 <- result.withGeneMap.asDF %>%
  filter(padj <= 0.001) %>%
  select(gene_ID, start, padj) %>%
  tibble::rownames_to_column("transcript_ID") %>%
  left_join(max_exp, by = "transcript_ID") %>%
  mutate(max_log = log2(max_exp + 1) + 1,
         qtl_pos = ifelse(start >= 12875963 & start <= 16344528, 1, 0)) %>% 
  arrange(start)

data_bigbx1 <- data_all %>% 
  filter(transcript_ID %in% genes_1$transcript_ID) %>%
  select(transcript_ID, gene_ID, start, unqC_My, unqC_Sp, unqC_Sum) %>%
  #pivot_longer(c(unqC_My, unqC_Sp), names_to = "allele", values_to = "exp") %>%
  mutate(
    #log_exp = log2(exp + 1), 
    qtl_pos = ifelse(start >= 12875963 & start <= 16344528, 1, 0))

Get the data for boxplots with high maximum expression (withing both Sp and My)

Threshold in this case was chosen visually to keep only five boxplots filled with color for each of the plots.The same approach works for the other two big boxplots.

gene_names1 <- data_bigbx1 %>%
  group_by(start, gene_ID) %>%
  dplyr::summarise(max = max(unqC_Sum)) %>%
  filter(max > 9000)
## `summarise()` has grouped output by 'start'. You can override using the
## `.groups` argument.
high_boxplots1 <- data_bigbx1 %>%
  group_by(start, gene_ID) %>%
  filter(max(unqC_Sum) > 9000)

F-05 III: Data for boxplot for Big Boxplot №2 (category = |L2FC| > 2 & padj < 0.05)

Prepare data for the boxplot

genes_2 <- result.withGeneMap.asDF %>%
  filter(Significance == "|L2FC| > 2 & padj < 0.05") %>%
  select(gene_ID, start, padj) %>%
  tibble::rownames_to_column("transcript_ID") %>%
  left_join(max_exp, by = "transcript_ID") %>%
  mutate(max_log = log2(max_exp + 1) + 1,
         qtl_pos = ifelse(start >= 12875963 & start <= 16344528, 1, 0)) %>% 
  arrange(start)

data_bigbx2 <- data_all %>% 
  filter(transcript_ID %in% genes_2$transcript_ID) %>%
  select(transcript_ID, gene_ID, start, unqC_My, unqC_Sp, unqC_Sum) %>%
  #pivot_longer(c(unqC_My, unqC_Sp), names_to = "allele", values_to = "exp") %>%
  mutate(
    #log_exp = log2(exp + 1), 
    qtl_pos = ifelse(start >= 12875963 & start <= 16344528, 1, 0))

Prepare data for the colored boxplots

gene_names2 <- data_bigbx2 %>%
  group_by(start, gene_ID) %>%
  dplyr::summarise(max = max(unqC_Sum)) %>%
  filter(max > 3700)
## `summarise()` has grouped output by 'start'. You can override using the
## `.groups` argument.
high_boxplots2 <- data_bigbx2 %>%
  group_by(start, gene_ID) %>%
  filter(max(unqC_Sum) > 3700)

F-05 IV: Data for boxplot for Big Boxplot №3 (category = |L2FC| > 4 & padj < 0.01)

Prepare Data for the boxplot

genes_3 <- result.withGeneMap.asDF %>%
  filter(Significance == "|L2FC| > 4 & padj < 0.01") %>%
  select(gene_ID, start, padj) %>%
  tibble::rownames_to_column("transcript_ID") %>%
  left_join(max_exp, by = "transcript_ID") %>%
  mutate(max_log = log2(max_exp + 1) + 1,
         qtl_pos = ifelse(start >= 12875963 & start <= 16344528, 1, 0)) %>% 
  arrange(start)

data_bigbx3 <- data_all %>% 
  filter(transcript_ID %in% genes_3$transcript_ID) %>%
  select(transcript_ID, gene_ID, start, unqC_My, unqC_Sp, unqC_Sum) %>%
  #pivot_longer(c(unqC_My, unqC_Sp), names_to = "allele", values_to = "exp") %>%
  mutate(
    #log_exp = log2(exp + 1), 
    qtl_pos = ifelse(start >= 12875963 & start <= 16344528, 1, 0))

Prepare data for the colored boxplots

gene_names3 <- data_bigbx3 %>%
  group_by(start, gene_ID) %>%
  dplyr::summarise(max = max(unqC_Sum)) %>%
  filter(max > 2100)
## `summarise()` has grouped output by 'start'. You can override using the
## `.groups` argument.
high_boxplots3 <- data_bigbx3 %>%
  group_by(start, gene_ID) %>%
  filter(max(unqC_Sum) > 2100)

Step F-06: Plot Boxplots

F-06 I: Boxplot - top 20 genes

grDevices::png(filename = "bx_20_v2.png",  width = 16.5, height = 10, units = 'cm', res = 400)

bx_20_v2 <- data_bx_20 %>%
  ggplot(mapping = aes(x = as.factor(start), y = log_exp, fill = as.factor(qtl_pos))) +
  geom_boxplot(alpha = 0.3) +
  geom_point(aes(color = allele),
             #position = position_jitter(),
             size = 1.3) +
  stat_summary(fun.y=mean, geom="point", shape=20, size=4, color="#FFA500", fill="#FFA500") +
  scale_color_manual("Haplotype",
                     values = c("#E63946", "#457b92"),
                     labels = c("My", "Sp")) +
  scale_fill_manual("QTL region", values = c("White", "#00A86B"),
                    labels = c("out", "in")) +
  scale_x_discrete(labels = genes_20$gene_ID) +
  labs(
    #title = "Top 20 genes with highest gene expression",
    y = "Log2(Expression + 1)",
    x = "Gene ID"
  ) +
  theme_bw() + 
  theme(
    panel.grid.major = element_line(colour = 'grey85', size = 0.2),
    panel.grid.minor = element_line(colour = 'grey90', size = 0.1),
    axis.title.x = element_blank(),
    axis.title.y = element_text(size = 10),
    plot.title = element_blank(),
    axis.text.x = element_text(angle = 45, hjust=1),
    #legend.position = c(0.8,0.2), #You can adjust legend position here
    legend.background = element_rect(color = "black"),
    legend.title = element_text(size = 8),
    legend.text = element_text(size = 8)
  )
## Warning: `fun.y` is deprecated. Use `fun` instead.
bx_20_v2

dev.off()
## quartz_off_screen 
##                 2
knitr::include_graphics("bx_20_v2.png")

F-06 II: Big Boxplot №1 (genes with p-value < 0.001)

grDevices::png(filename = "bx_big1.png",  width = 16.5, height = 10, units = 'cm', res = 400)

bx_big1 <- data_bigbx1 %>%
  ggplot(mapping = aes(x = as.factor(start), y = unqC_Sum)) +
  geom_boxplot() +
  geom_boxplot(data = high_boxplots1, aes(x = as.factor(start), y = unqC_Sum, fill = gene_ID, color = gene_ID),
              show.legend = FALSE) +
  geom_point(data = data_bigbx1 %>% filter(qtl_pos == 1), aes(x = as.factor(start), y = 17500), shape = 15,
             size = 2.5, color = "#00A86B", fill = "#00A86B") +
  annotate("label", label = "QTL region", y = 18500, x = "14891953", size = 3) +
  #geom_rect(aes(xmin= as.factor("12875963"), xmax= as.factor("16344528"), ymin=0, ymax=Inf), alpha = 0.5) +
  #geom_segment(aes(x = "12875963", y = 0, xend = "12875963", yend = 17500)) +
  #scale_x_discrete(labels = levels(factor(data_bigbx1$gene_ID))) +
  scale_x_discrete(labels = levels(factor(data_bigbx1$gene_ID)),
                   guide = guide_axis(check.overlap = TRUE)
                   ) +
  scale_fill_manual(values = colors) +
  scale_color_manual(values = colors) +
  ggrepel::geom_label_repel(data = gene_names1, aes(x = as.factor(start), y = max, label = gene_ID, fill = gene_ID),
                            hjust = -0.2,
                            show.legend = FALSE) +
  labs(y = "Total Gene expression") +
  theme_bw() + 
  theme(
    #panel.grid.major = element_line(colour = 'grey85', size = 0.2),
    #panel.grid.minor = element_line(colour = 'grey90', size = 0.1),
    axis.title.x = element_blank(),
    axis.title.y = element_text(size = 10),
    #axis.title.y = element_blank(),
    plot.title = element_text(size = 10),
    axis.text.x = element_text(angle = 45, hjust=1),
    #legend.position = c(0.8,0.2), #You can adjust legend position here
    legend.background = element_rect(color = "black")
  )

bx_big1

dev.off()
## quartz_off_screen 
##                 2
knitr::include_graphics("bx_big1.png")

F-06 III: Big Boxplot №2 (category = |L2FC| > 2 & padj < 0.05)

grDevices::png(filename = "bx_big2.png",  width = 16.5, height = 10, units = 'cm', res = 400)

bx_big2 <- data_bigbx2 %>%
  ggplot(mapping = aes(x = as.factor(start), y = unqC_Sum)) +
  geom_boxplot() +
  geom_boxplot(data = high_boxplots2, aes(x = as.factor(start), y = unqC_Sum, fill = gene_ID, color = gene_ID),
               show.legend = FALSE) +
  geom_point(data = data_bigbx2 %>% filter(qtl_pos == 1), aes(x = as.factor(start), y = 17500), shape = 15,
             size = 2.5, color = "#00A86B", fill = "#00A86B") +
  annotate("label", label = "QTL region", y = 18500, x = "14795486", size = 3) +
  #geom_rect(aes(xmin= as.factor("12875963"), xmax= as.factor("16344528"), ymin=0, ymax=Inf), alpha = 0.5) +
  #geom_segment(aes(x = "12875963", y = 0, xend = "12875963", yend = 17500)) +
  #scale_x_discrete(labels = levels(factor(data_bigbx1$gene_ID))) +
  scale_x_discrete(labels = levels(factor(data_bigbx2$gene_ID)),
                   guide = guide_axis(check.overlap = TRUE)
  ) +
  scale_fill_manual(values = colors) +
  scale_color_manual(values = colors) +
  ggrepel::geom_label_repel(data = gene_names2, aes(x = as.factor(start), y = max, label = gene_ID, fill = gene_ID),
                            hjust = -0.2,
                            show.legend = FALSE) +
  labs(y = "Total Gene expression") +
  theme_bw() + 
  theme(
    #panel.grid.major = element_line(colour = 'grey85', size = 0.2),
    #panel.grid.minor = element_line(colour = 'grey90', size = 0.1),
    axis.title.x = element_blank(),
    axis.title.y = element_text(size = 10),
    #axis.title.y = element_blank(),
    plot.title = element_text(size = 10),
    axis.text.x = element_text(angle = 45, hjust=1),
    #legend.position = c(0.8,0.2), #You can adjust legend position here
    legend.background = element_rect(color = "black")
  )

bx_big2

dev.off()
## quartz_off_screen 
##                 2
knitr::include_graphics("bx_big2.png")

F-06 III: Big Boxplot №2 (category = |L2FC| > 4 & padj < 0.01)

grDevices::png(filename = "bx_big3.png",  width = 16.5, height = 10, units = 'cm', res = 400)

bx_big3 <- data_bigbx3 %>%
  ggplot(mapping = aes(x = as.factor(start), y = unqC_Sum)) +
  geom_boxplot() +
  geom_boxplot(data = high_boxplots3, aes(x = as.factor(start), y = unqC_Sum, fill = gene_ID, color = gene_ID),
               show.legend = FALSE) +
  geom_point(data = data_bigbx3 %>% filter(qtl_pos == 1), aes(x = as.factor(start), y = 10000), shape = 15,
             size = 2.5, color = "#00A86B", fill = "#00A86B") +
  annotate("label", label = "QTL region", y = 11000, x = "14635146", size = 3) +
  #geom_rect(aes(xmin= as.factor("12875963"), xmax= as.factor("16344528"), ymin=0, ymax=Inf), alpha = 0.5) +
  #geom_segment(aes(x = "12875963", y = 0, xend = "12875963", yend = 17500)) +
  #scale_x_discrete(labels = levels(factor(data_bigbx1$gene_ID))) +
  scale_x_discrete(labels = levels(factor(data_bigbx3$gene_ID)),
                   guide = guide_axis(check.overlap = TRUE)
  ) +
  scale_fill_manual(values = colors) +
  scale_color_manual(values = colors) +
  ggrepel::geom_label_repel(data = gene_names3, aes(x = as.factor(start), y = max, label = gene_ID, fill = gene_ID),
                            hjust = -0.2,
                            show.legend = FALSE) +
  labs(y = "Total Gene expression") +
  theme_bw() + 
  theme(
    #panel.grid.major = element_line(colour = 'grey85', size = 0.2),
    #panel.grid.minor = element_line(colour = 'grey90', size = 0.1),
    axis.title.x = element_blank(),
    axis.title.y = element_text(size = 10),
    #axis.title.y = element_blank(),
    plot.title = element_text(size = 10),
    axis.text.x = element_text(angle = 45, hjust=1),
    #legend.position = c(0.8,0.2), #You can adjust legend position here
    legend.background = element_rect(color = "black")
  )

bx_big3

dev.off()
## quartz_off_screen 
##                 2
knitr::include_graphics("bx_big3.png")

Part G: Test QTL region

Step G-01: Compare QTL region with 3 other regions

G-01 I: Make a column for regions

result.withRegions <- result.withGeneMap.asDF %>%
  mutate(region = case_when(
    start <  12875963/2 ~ "1",
    start <  12875963 ~ "2",
    start <  16344528 ~ "QTL",
    TRUE ~ "3"))

G-01 II: Compare results by significance groups (from “Upper”/“Lower”/“Non-sig” categoies)

We treat gene as having significant difference if it is not from “Non-sig” category

Variables: * num_sig = 1 if gene expression difference is significant, 0 - otherwise * n - number of genes in the region * mean total - average gene expression (mean unqC_total) * sum_num_sig - number of significant genes in the region * perc_sig - percent of significant genes from the overall number of genes in the region

resultsRegionsCat <- result.withRegions %>% 
  mutate(num_sig = ifelse(Significance == "non-Sig",0,1)) %>%
  group_by(region) %>%
  summarise(n = n(),
            mean_total = mean(unqC_total),
            sum_num_sig = sum(num_sig)) %>%
    mutate(perc_sig = sum_num_sig/n)

resultsRegionsCat
## # A tibble: 4 × 5
##   region     n mean_total sum_num_sig perc_sig
##   <chr>  <int>      <dbl>       <dbl>    <dbl>
## 1 1        449      4079.          51   0.114 
## 2 2        276      3682.          36   0.130 
## 3 3        516      3693.          33   0.0640
## 4 QTL      520      3912.          47   0.0904

G-01 III: Compare results by significance groups (depend only on adjusted p-value)

We treat gene as having significant difference if its adjusted p-value < 0.001

resultsRegionsPadj <- result.withRegions %>% 
  mutate(num_sig = ifelse(padj < 0.001 ,1,0)) %>%
  group_by(region) %>%
  summarise(n = n(),
            mean_total = mean(unqC_total),
            sum_num_sig = sum(num_sig)) %>%
  mutate(perc_sig = sum_num_sig/n)

resultsRegionsPadj
## # A tibble: 4 × 5
##   region     n mean_total sum_num_sig perc_sig
##   <chr>  <int>      <dbl>       <dbl>    <dbl>
## 1 1        449      4079.          56    0.125
## 2 2        276      3682.          31    0.112
## 3 3        516      3693.          53    0.103
## 4 QTL      520      3912.          56    0.108

G-01 IV: Run Chi-squared tests

Tests compare whether proportions in sample differ.

All the analysis checks the proportions of significant genes obtained by adjusted p-value (< 0.001)

check_prop <- resultsRegionsPadj$perc_sig[4] #Exected proportion

chisq.test(c(56, 449-56), p = c(check_prop, 1-check_prop))$p.value #Region 1
## [1] 0.2444053
chisq.test(c(31, 276-31), p = c(check_prop, 1-check_prop))$p.value #Region 2
## [1] 0.8041747
chisq.test(c(53, 516-53), p = c(check_prop, 1-check_prop))$p.value #Region 3
## [1] 0.7152144

There is no case when Chisq test p-value is less than 0.05. Hence, we cannot reject the null hypothesis about the 0 difference.

Step G-02: Compare QTL region with random samples of equal size

G-02 I: Get the data outside of the QTL region as a separate dataframe

resultsOutQTL <- result.withRegions %>% filter(region != "QTL") %>%
  mutate(num_sig = ifelse(padj < 0.001, 1, 0))

G-02 II: Run Chisq test for 10000 random samples

Random samples from this part do not take into account actual genomic positions. For each iteration of the loop random genes are taken to form a dataset. Then the proportion if significant genes is calculated and checked whether it significantly differs from the expected proportions (which is an actual proportion which we obtained for the QTL region).

Then we calculate a share of samples with significantly different proportions of significant genes.

Run the loop

# Prepare dataframe to take loop records.
df_test<- as.data.frame(matrix(nrow = 10000, ncol = 3))
df_test <- df_test %>% 
  dplyr::rename(num_sig = V1,
         prop = V2, 
         p.value = V3)

set.seed(256)

for (i in 1:10000) {
  df_i <- resultsOutQTL[sample(nrow(resultsOutQTL), 520), ]
  n_sig <- sum(df_i$num_sig == 1)
  n_unsig <- sum(df_i$num_sig == 0)
  df_test$num_sig[i] <- n_sig
  df_test$prop[i] <- n_sig/nrow(df_i)
  df_test$p.value[i] <- chisq.test(c(n_sig, n_unsig), p = c(check_prop, 1 - check_prop))$p.value
}

Get the share of sample where proportion of significant genes differs from the QTL region

sum(df_test$p.value < 0.05)/10000
## [1] 0.0253

The result is less than 5%, so we can conclude that proportion of significant genes does really differ from the latter dataset.

Check regions with significant difference in proportions whether this proportion was larger or smaller

1 - proportion of significant genes in QTL region is larger than in random sample 0 - otherwise

df_test_sig <- df_test %>% filter(p.value < 0.05) %>%
  mutate(compare = ifelse(prop < check_prop, 1, 0))

df_test_sig %>% group_by(compare) %>%
  summarise(n =n())
## # A tibble: 2 × 2
##   compare     n
##     <dbl> <int>
## 1       0   236
## 2       1    17

Result: we can see that even if there was a signigicant difference in the proportion of significant genes, in most of the cases this proportion was larger than in QTL region.

G-02 III: Histogram of obtained proportions

grDevices::png(filename = "chi_hist.png",  width = 12.5, height = 8, units = 'cm', res = 400)

chi_hist <- df_test %>%
  ggplot(aes(x = prop)) +
  geom_histogram(bins = 15, fill = "#58508d", alpha = 0.9) +
  geom_vline(xintercept = check_prop, color = "red", size = 1) +
  scale_x_continuous() +
  annotate("label", label = "Expected proportion\n0.108",
           x= 0.09 , y = 2000, size = 3) +
  annotate("segment", x = 0.099, y = 2000,
           xend = check_prop, yend = 2000,
           arrow = arrow(length = unit(0.1, "cm"), type = "closed"), 
           lineend = "butt", linejoin = "mitre",
           size = 0.6) +
  theme_bw() + 
  theme(
    panel.grid.major = element_line(colour = 'grey85', size = 0.2),
    panel.grid.minor = element_line(colour = 'grey90', size = 0.1),
    axis.title.x = element_blank(),
    axis.title.y = element_blank(),
    plot.title = element_blank(),
    axis.text.x = element_text(angle = 45, hjust=1),
    #legend.position = c(0.8,0.2), #You can adjust legend position here
    legend.background = element_rect(color = "black"),
    legend.title = element_text(size = 8),
    legend.text = element_text(size = 8)
  )

chi_hist

dev.off()
## quartz_off_screen 
##                 2
knitr::include_graphics("chi_hist.png")

We can see that mostly the proportions of significant genes tend to be higher than proportion from the QTL region.

Step G-03: Compare QTL region with samples of equal size ordered by genomic position

In this part we make samples of equal size which are go before the QTL region by genomic position. The region after the QTL has only 516 genes which were already check by the Chisq test. But we would like to know whether there is an interval of 520 genes which could have significantly different proportion of significant genes.

G-03 I: Prepare a dataframe with genes outside of QTL regions arranged be genomic position

# Arrange genes outside of qtl region by genomic position
resultsSortedOutQTL <- resultsOutQTL %>%
  arrange(start)

# Calculate the number of genes before the start of QTL region
check_rows <- sum(resultsSortedOutQTL$start < 12875963)

G-03 II: Run Chisq test for all 520 gene size samples before QTL region

All in all there 206 samples of 520 genes before the QTL region.

Run the loop

# Prepare a dataframe for loop records
df_test_2<- as.data.frame(matrix(nrow = check_rows - 520 + 1, ncol = 3))

df_test_2 <- df_test_2 %>% 
  dplyr::rename(num_sig = V1,
                prop = V2, 
                p.value = V3)

# Loop
for (i in 1:(check_rows-520+1)) {
  df_i2 <- resultsSortedOutQTL[i:(i+519),]
  n_sig2 <- sum(df_i2$num_sig == 1)
  n_unsig2 <- sum(df_i2$num_sig == 0)
  df_test_2$num_sig[i] <- n_sig2
  df_test_2$prop[i] <- n_sig2/nrow(df_i2)
  df_test_2$p.value[i] <- chisq.test(c(n_sig2, n_unsig2), p = c(check_prop, 1 - check_prop))$p.value
}

Get the share of samples where proportion of significant genes differs from the QTL region

sum(df_test_2$p.value < 0.05)
## [1] 0

Result: there are no regions of equal size before the QTL regions which have significantly different number of significant genes.

G-03 III: Histogram of obtained proportions

grDevices::png(filename = "chi_hist_2.png",  width = 12.5, height = 8, units = 'cm', res = 400)

chi_hist_2 <- df_test_2 %>%
  ggplot(aes(x = prop)) +
  geom_histogram(#bins = 15,
                 fill = "#58508d", alpha = 0.9) +
  geom_vline(xintercept = check_prop, color = "red", size = 1) +
  scale_x_continuous(limits = c(0.085,0.13)) +
  annotate("label", label = "Expected proportion\n0.108",
           x= 0.0927 , y = 40, size = 3) +
  annotate("segment", x = 0.099, y = 40,
           xend = check_prop, yend = 40,
           arrow = arrow(length = unit(0.1, "cm"), type = "closed"), 
           lineend = "butt", linejoin = "mitre",
           size = 0.6) +
  theme_bw() + 
  theme(
    panel.grid.major = element_line(colour = 'grey85', size = 0.2),
    panel.grid.minor = element_line(colour = 'grey90', size = 0.1),
    axis.title.x = element_blank(),
    axis.title.y = element_blank(),
    plot.title = element_blank(),
    axis.text.x = element_text(angle = 45, hjust=1),
    #legend.position = c(0.8,0.2), #You can adjust legend position here
    legend.background = element_rect(color = "black"),
    legend.title = element_text(size = 8),
    legend.text = element_text(size = 8)
  )

chi_hist_2
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 2 rows containing missing values (geom_bar).
dev.off()
## quartz_off_screen 
##                 2
knitr::include_graphics("chi_hist_2.png")

At the same time, we can cleaely see that in all cases the proportions of significant genes a higher, than that of QTL regions.

Part H: Karyoplot

Upload useful packages

library(karyoploteR)
library(Biostrings)

Step H-01: Prepare data

Upload lyrata_genome

lyr.chr <- fasta.index("lyrata_genome.fa") #%>% head(5)
typeof(lyr.chr)
## [1] "list"

select required rows and then add a mutant column

lyr.chr <- lyr.chr %>% 
  dplyr::select(desc, seqlength) %>%
  dplyr::mutate(chrom = base::strsplit(desc, " "))

Select columns

# only select the first element of the mutant column
lyr.chr$chrom <- base::sapply(lyr.chr$chrom, `[`, 1)

# again select only required columns
lyr.chr <- lyr.chr %>% 
  dplyr::select(chrom, seqlength)

replace the word “scaffold_” with null string

lyr.chr <- data.frame(lapply(lyr.chr, function(x) {
  gsub("scaffold_", "", x)
}))

the values in chrom are stored as factor -> convert it into character and numeric

lyr.chr$chrom <- as.numeric(as.character(lyr.chr$chrom))
lyr.chr$seqlength <- as.numeric(as.character(lyr.chr$seqlength))

only select chromosome from 1-9

lyr.chr <- lyr.chr %>%
  filter(as.numeric(chrom) < 9) %>% 
  arrange(chrom)

add prefix to the chromosome number

lyr.chr$chrom <- base::paste0("chr", lyr.chr$chrom)

Step H-02: Plot the choromosome using karyoploteR

Create custom GenomeRanges for lyrata

lyr.custom.genome <- toGRanges(data.frame(
  chr = c(lyr.chr$chrom), start = rep(1, length(lyr.chr$chrom)),
  end = c(lyr.chr$seqlength)
))
lyr.custom.genome
## GRanges object with 8 ranges and 0 metadata columns:
##     seqnames     ranges strand
##        <Rle>  <IRanges>  <Rle>
##   1     chr1 1-33132539      *
##   2     chr2 1-19320864      *
##   3     chr3 1-24464547      *
##   4     chr4 1-23328337      *
##   5     chr5 1-21221946      *
##   6     chr6 1-25113588      *
##   7     chr7 1-24649197      *
##   8     chr8 1-22951293      *
##   -------
##   seqinfo: 8 sequences from an unspecified genome; no seqlengths

prepare cytobands from GFF file - make GRanges of available trancription database

gffRangedGenome.aly <- rtracklayer::import.gff("data/lyrata1.to9.Rawat.gff")
gffRangedGenome.aly %>% head(5)
## GRanges object with 5 ranges and 8 metadata columns:
##       seqnames    ranges strand |    source       type     score     phase
##          <Rle> <IRanges>  <Rle> |  <factor>   <factor> <numeric> <integer>
##   [1]     chr1    1-2541      - | version-2 gene            0.43      <NA>
##   [2]     chr1    1-2541      - | version-2 transcript      0.43      <NA>
##   [3]     chr1    62-144      - | version-2 intron          0.96      <NA>
##   [4]     chr1   253-406      - | version-2 intron          1.00      <NA>
##   [5]     chr1  783-1422      - | version-2 intron          1.00      <NA>
##                 ID        Name                Note          Parent
##        <character> <character>     <CharacterList> <CharacterList>
##   [1]    AL1G10010   AL1G10010 Protein_Coding_gene                
##   [2] AL1G10010.t1        <NA>                           AL1G10010
##   [3] AL1G10010.t1        <NA>                           AL1G10010
##   [4] AL1G10010.t1        <NA>                           AL1G10010
##   [5] AL1G10010.t1        <NA>                           AL1G10010
##   -------
##   seqinfo: 9 sequences from an unspecified genome; no seqlengths

creating cytobands

aly.cytoBands <- as.data.frame(gffRangedGenome.aly) %>%
  dplyr::filter(type == "gene") %>%
  dplyr::select(seqnames, start, end, strand, width, Name) 

  # convert cytobands to GRanges object
aly.cytoBands <- toGRanges(aly.cytoBands)

plot custom genome

kp.lyr <- plotKaryotype(genome = lyr.custom.genome)

library(AnnotationDbi)
library(GenomicFeatures)
txdb.aly <- makeTxDbFromGFF("data/lyrata1.to9.Rawat.gff", format = "gff",
                            dataSource = "Rawat et. al",
                            organism = "Arabidopsis lyrata")
## Import genomic features from the file as a GRanges object ... OK
## Prepare the 'metadata' data frame ... OK
## Make the TxDb object ...
## Warning in .get_cds_IDX(mcols0$type, mcols0$phase): The "phase" metadata column contains non-NA values for features of type
##   stop_codon. This information was ignored.
## Warning in makeTxDbFromGRanges(gr, metadata = metadata): The following transcripts were dropped because their exon ranks could
##   not be inferred (either because the exons are not on the same
##   chromosome/strand or because they are not separated by introns):
##   AL1G10010, AL1G10020, AL1G10030, AL1G10040, AL1G10050, AL1G10060,
##   AL1G10070, AL1G10080, AL1G10090, AL1G10100, AL1G10110, AL1G10120,
##   AL1G10130, AL1G10140, AL1G10150, AL1G10160, AL1G10170, AL1G10180,
##   AL1G10190, AL1G10200, AL1G10210, AL1G10220, AL1G10230, AL1G10240,
##   AL1G10250, AL1G10260, AL1G10270, AL1G10280, AL1G10290, AL1G10300,
##   AL1G10310, AL1G10320, AL1G10330, AL1G10340, AL1G10350, AL1G10360,
##   AL1G10370, AL1G10380, AL1G10390, AL1G10400, AL1G10410, AL1G10420,
##   AL1G10430, AL1G10450, AL1G10470, AL1G10480, AL1G10490, AL1G10500,
##   AL1G10510, AL1G10520, AL1G10530, AL1G10540, AL1G10550, AL1G10560,
##   AL1G10570, AL1G10580, AL1G10590, AL1G10600, AL1G10610, AL1G10620,
##   AL1G10630, AL1G10640, AL1G10650, AL1G10660, AL1G10670, AL1G10680,
##   AL1G10690, AL1G10700, AL1G10710, AL1G10720, AL1G10740, AL1G10750,
##   AL1G10760, AL1G10770, AL1G10780, AL1G10790, AL1G10800, AL1G10810,
##   AL1G10820, AL1G10830, AL1G10840, AL1G10850, AL1G10860, AL1G10870,
##   AL1G10880, AL1G10890, AL1G10900, AL1G10910, AL1G10920, AL1G10930,
##   AL1G10940, AL1G10950, AL1G10960, AL1G10970, AL1G10980, AL1G10990,
##   AL1G11000, AL1G11010, AL1G11020, AL1G11030, AL1G11040, AL1G11050,
##   AL1G11060, AL1G11070, AL1G11080, AL1G11090, AL1G11100, AL1G11110,
##   AL1G11120, AL1G11130, AL1G11140, AL1G11150, AL1G11160, AL1G11170,
##   AL1G11180, AL1G11200, AL1G11210, AL1G11220, AL1G11230, AL1G11240,
##   AL1G11250, AL1G11260, AL1G11270, AL1G11280, AL1G11290, AL1G11310,
##   AL1G11320, AL1G11330, AL1G11340, AL1G11350, AL1G11360, AL1G11370,
##   AL1G11380, AL1G11390, AL1G11400, AL1G11410, AL1G11420, AL1G11430,
##   AL1G11440, AL1G11450, AL1G11460, AL1G11470, AL1G11480, AL1G11500,
##   AL1G11510, AL1G11520, AL1G11530, AL1G11540, AL1G11550, AL1G11560,
##   AL1G11570, AL1G11580, AL1G11590, AL1G11600, AL1G11610, AL1G11620,
##   AL1G11630, AL1G11640, AL1G11650, AL1G11660, AL1G11670, AL1G11680,
##   AL1G11690, AL1G11700, AL1G11710, AL1G11720, AL1G11730, AL1G11740,
##   AL1G11750, AL1G11760, AL1G11770, AL1G11780, AL1G11790, AL1G11800,
##   AL1G11810, AL1G11820, AL1G11840, AL1G11850, AL1G11860, AL1G11870,
##   AL1G11880, AL1G11890, AL1G11900, AL1G11910, AL1G11930, AL1G11940,
##   AL1G11960, AL1G11970, AL1G11980, AL1G11990, AL1G12000, AL1G12010,
##   AL1G12020, AL1G12030, AL1G12040, AL1G12050, AL1G12060, AL1G12070,
##   AL1G12080, AL1G12090, AL1G12100, AL1G12110, AL1G12120, AL1G12130,
##   AL1G12150, AL1G12160, AL1G12170, AL1G12180, AL1G12190, AL1G12200,
##   AL1G12210, AL1G12220, AL1G12230, AL1G12240, AL1G12260, AL1G12270,
##   AL1G12280, AL1G12290, AL1G12300, AL1G12310, AL1G12320, AL1G12330,
##   AL1G12340, AL1G12350, AL1G12360, AL1G12370, AL1G12380, AL1G12390,
##   AL1G12400, AL1G12410, AL1G12420, AL1G12430, AL1G12440, AL1G12450,
##   AL1G12460, AL1G12470, AL1G12480, AL1G12490, AL1G12500, AL1G12510,
##   AL1G12520, AL1G12530, AL1G12540, AL1G12550, AL1G12560, AL1G12570,
##   AL1G12580, AL1G12590, AL1G12600, AL1G12610, AL1G12620, AL1G12630,
##   AL1G12640, AL1G12650, AL1G12660, AL1G12670, AL1G12680, AL1G12690,
##   AL1G12700, AL1G12710, AL1G12720, AL1G12730, AL1G12740, AL1G12750,
##   AL1G12760, AL1G12770, AL1G12780, AL1G12790, AL1G12800, AL1G12810,
##   AL1G12820, AL1G12830, AL1G12840, AL1G12850, AL1G12860, AL1G12870,
##   AL1G12880, AL1G12890, AL1G12900, AL1G12910, AL1G12920, AL1G12930,
##   AL1G12940, AL1G12950, AL1G12960, AL1G12970, AL1G12980, AL1G12990,
##   AL1G13000, AL1G13010, AL1G13030, AL1G13040, AL1G13050, AL1G13060,
##   AL1G13070, AL1G13090, AL1G13100, AL1G13110, AL1G13120, AL1G13130,
##   AL1G13150, AL1G13160, AL1G13170, AL1G13180, AL1G13190, AL1G13200,
##   AL1G13210, AL1G13220, AL1G13230, AL1G13240, AL1G13250, AL1G13260,
##   AL1G13270, AL1G13280, AL1G13290, AL1G13300, AL1G13310, AL1G13320,
##   AL1G13330, AL1G13340, AL1G13350, AL1G13360, AL1G13370, AL1G13380,
##   AL1G13390, AL1G13400, AL1G13410, AL1G13430, AL1G13440, AL1G13450,
##   AL1G13460, AL1G13470, AL1G13480, AL1G13490, AL1G13500, AL1G13510,
##   AL1G13520, AL1G13530, AL1G13540, AL1G13550, AL1G13560, AL1G13570,
##   AL1G13580, AL1G13590, AL1G13600, AL1G13610, AL1G13620, AL1G13630,
##   AL1G13640, AL1G13650, AL1G13660, AL1G13670, AL1G13680, AL1G13690,
##   AL1G13700, AL1G13710, AL1G13720, AL1G13730, AL1G13740, AL1G13750,
##   AL1G13760, AL1G13770, AL1G13780, AL1G13790, AL1G13800, AL1G13810,
##   AL1G13820, AL1G13830, AL1G13840, AL1G13850, AL1G13860, AL1G13870,
##   AL1G13880, AL1G13890, AL1G13900, AL1G13910, AL1G13930, AL1G13940,
##   AL1G13950, AL1G13960, AL1G13970, AL1G13980, AL1G13990, AL1G14000,
##   AL1G14010, AL1G14020, AL1G14030, AL1G14040, AL1G14050, AL1G14060,
##   AL1G14070, AL1G14080, AL1G14090, AL1G14100, AL1G14110, AL1G14120,
##   AL1G14130, AL1G14140, AL1G14150, AL1G14160, AL1G14170, AL1G14180,
##   AL1G14190, AL1G14200, AL1G14210, AL1G14220, AL1G14230, AL1G14240,
##   AL1G14250, AL1G14260, AL1G14270, AL1G14280, AL1G14290, AL1G14300,
##   AL1G14310, AL1G14320, AL1G14330, AL1G14340, AL1G14350, AL1G14360,
##   AL1G14370, AL1G14380, AL1G14390, AL1G14400, AL1G14410, AL1G14420,
##   AL1G14430, AL1G14450, AL1G14460, AL1G14470, AL1G14480, AL1G14490,
##   AL1G14500, AL1G14510, AL1G14520, AL1G14530, AL1G14540, AL1G14550,
##   AL1G14560, AL1G14570, AL1G14580, AL1G14590, AL1G14600, AL1G14610,
##   AL1G14620, AL1G14630, AL1G14640, AL1G14650, AL1G14660, AL1G14670,
##   AL1G14680, AL1G14690, AL1G14700, AL1G14710, AL1G14720, AL1G14730,
##   AL1G14740, AL1G14750, AL1G14760, AL1G14770, AL1G14780, AL1G14790,
##   AL1G14800, AL1G14810, AL1G14820, AL1G14830, AL1G14840, AL1G14850,
##   AL1G14860, AL1G14870, AL1G14880, AL1G14890, AL1G14900, AL1G14910,
##   AL1G14920, AL1G14930, AL1G14940, AL1G14950, AL1G14960, AL1G14970,
##   AL1G14980, AL1G14990, AL1G15000, AL1G15010, AL1G15020, AL1G15030,
##   AL1G15040, AL1G15050, AL1G15060, AL1G15070, AL1G15080, AL1G15090,
##   AL1G15100, AL1G15110, AL1G15120, AL1G15130, AL1G15140, AL1G15150,
##   AL1G15160, AL1G15170, AL1G15180, AL1G15190, AL1G15210, AL1G15220,
##   AL1G15240, AL1G15250, AL1G15260, AL1G15270, AL1G15280, AL1G15290,
##   AL1G15300, AL1G15310, AL1G15320, AL1G15330, AL1G15340, AL1G15350,
##   AL1G15360, AL1G15370, AL1G15380, AL1G15390, AL1G15400, AL1G15410,
##   AL1G15420, AL1G15430, AL1G15440, AL1G15450, AL1G15460, AL1G15470,
##   AL1G15480, AL1G15490, AL1G15500, AL1G15510, AL1G15520, AL1G15530,
##   AL1G15540, AL1G15550, AL1G15560, AL1G15570, AL1G15580, AL1G15590,
##   AL1G15600, AL1G15610, AL1G15620, AL1G15630, AL1G15640, AL1G15650,
##   AL1G15660, AL1G15670, AL1G15680, AL1G15700, AL1G15710, AL1G15720,
##   AL1G15730, AL1G15740, AL1G15750, AL1G15760, AL1G15770, AL1G15780,
##   AL1G15790, AL1G15800, AL1G15810, AL1G15820, AL1G15830, AL1G15840,
##   AL1G15850, AL1G15860, AL1G15870, AL1G15880, AL1G15890, AL1G15900,
##   AL1G15910, AL1G15920, AL1G15940, AL1G15950, AL1G15960, AL1G15970,
##   AL1G15980, AL1G15990, AL1G16000, AL1G16010, AL1G16020, AL1G16030,
##   AL1G16040, AL1G16050, AL1G16060, AL1G16070, AL1G16080, AL1G16100,
##   AL1G16110, AL1G16120, AL1G16130, AL1G16140, AL1G16150, AL1G16160,
##   AL1G16170, AL1G16180, AL1G16190, AL1G16200, AL1G16210, AL1G16220,
##   AL1G16230, AL1G16240, AL1G16250, AL1G16260, AL1G16270, AL1G16280,
##   AL1G16290, AL1G16300, AL1G16310, AL1G16320, AL1G16330, AL1G16340,
##   AL1G16350, AL1G16360, AL1G16370, AL1G16380, AL1G16390, AL1G16400,
##   AL1G16410, AL1G16420, AL1G16430, AL1G16440, AL1G16450, AL1G16460,
##   AL1G16470, AL1G16480, AL1G16490, AL1G16510, AL1G16520, AL1G16530,
##   AL1G16540, AL1G16550, AL1G16560, AL1G16570, AL1G16580, AL1G16590,
##   AL1G16600, AL1G16610, AL1G16620, AL1G16630, AL1G16640, AL1G16650,
##   AL1G16660, AL1G16670, AL1G16680, AL1G16690, AL1G16700, AL1G16710,
##   AL1G16720, AL1G16730, AL1G16740, AL1G16750, AL1G16760, AL1G16770,
##   AL1G16780, AL1G16790, AL1G16800, AL1G16810, AL1G16820, AL1G16830,
##   AL1G16840, AL1G16850, AL1G16860, AL1G16870, AL1G16890, AL1G16910,
##   AL1G16920, AL1G16930, AL1G16940, AL1G16950, AL1G16960, AL1G16970,
##   AL1G16980, AL1G16990, AL1G17000, AL1G17010, AL1G17020, AL1G17030,
##   AL1G17040, AL1G17050, AL1G17060, AL1G17070, AL1G17080, AL1G17090,
##   AL1G17100, AL1G17110, AL1G17120, AL1G17140, AL1G17150, AL1G17160,
##   AL1G17180, AL1G17190, AL1G17200, AL1G17210, AL1G17230, AL1G17240,
##   AL1G17250, AL1G17260, AL1G17270, AL1G17280, AL1G17290, AL1G17300,
##   AL1G17310, AL1G17320, AL1G17330
## OK
txdb.aly
## TxDb object:
## # Db type: TxDb
## # Supporting package: GenomicFeatures
## # Data source: Rawat et. al
## # Organism: Arabidopsis lyrata
## # Taxonomy ID: 59689
## # miRBase build ID: NA
## # Genome: NA
## # Nb of transcripts: 32868
## # Db created by: GenomicFeatures package from Bioconductor
## # Creation time: 2022-04-11 19:10:29 +0500 (Mon, 11 Apr 2022)
## # GenomicFeatures version at creation time: 1.46.5
## # RSQLite version at creation time: 2.2.10
## # DBSCHEMAVERSION: 1.2
columns(txdb.aly)
##  [1] "CDSCHROM"   "CDSEND"     "CDSID"      "CDSNAME"    "CDSPHASE"  
##  [6] "CDSSTART"   "CDSSTRAND"  "EXONCHROM"  "EXONEND"    "EXONID"    
## [11] "EXONNAME"   "EXONRANK"   "EXONSTART"  "EXONSTRAND" "GENEID"    
## [16] "TXCHROM"    "TXEND"      "TXID"       "TXNAME"     "TXSTART"   
## [21] "TXSTRAND"   "TXTYPE"
summary(txdb.aly)
## Length  Class   Mode 
##      1   TxDb     S4
metadata(txdb.aly)
##                                        name
## 1                                   Db type
## 2                        Supporting package
## 3                               Data source
## 4                                  Organism
## 5                               Taxonomy ID
## 6                          miRBase build ID
## 7                                    Genome
## 8                         Nb of transcripts
## 9                             Db created by
## 10                            Creation time
## 11 GenomicFeatures version at creation time
## 12         RSQLite version at creation time
## 13                          DBSCHEMAVERSION
##                                           value
## 1                                          TxDb
## 2                               GenomicFeatures
## 3                                  Rawat et. al
## 4                            Arabidopsis lyrata
## 5                                         59689
## 6                                          <NA>
## 7                                          <NA>
## 8                                         32868
## 9     GenomicFeatures package from Bioconductor
## 10 2022-04-11 19:10:29 +0500 (Mon, 11 Apr 2022)
## 11                                       1.46.5
## 12                                       2.2.10
## 13                                          1.2
head(seqlevels(txdb.aly), 15)
## [1] "chr1" "chr2" "chr3" "chr4" "chr5" "chr6" "chr7" "chr8" "Chr9"

extract the GRanges by geneID

genes.aly <- genes(txdb.aly)
genes.aly
## GRanges object with 30142 ranges and 1 metadata column:
##             seqnames          ranges strand |     gene_id
##                <Rle>       <IRanges>  <Rle> | <character>
##   AL1G10010     chr1          1-2541      - |   AL1G10010
##   AL1G10020     chr1       3306-6161      - |   AL1G10020
##   AL1G10040     chr1      9476-10535      + |   AL1G10040
##   AL1G10050     chr1     10548-11941      - |   AL1G10050
##   AL1G10060     chr1     12235-13421      - |   AL1G10060
##         ...      ...             ...    ... .         ...
##   AL9U12200     Chr9 1791630-1793287      + |   AL9U12200
##   AL9U12210     Chr9 1792941-1795545      - |   AL9U12210
##   AL9U12220     Chr9 1796870-1801565      - |   AL9U12220
##   AL9U12230     Chr9 1810180-1812874      + |   AL9U12230
##   AL9U12240     Chr9 1836100-1837535      - |   AL9U12240
##   -------
##   seqinfo: 9 sequences from an unspecified genome; no seqlengths
head(genes.aly,15)
## GRanges object with 15 ranges and 1 metadata column:
##             seqnames      ranges strand |     gene_id
##                <Rle>   <IRanges>  <Rle> | <character>
##   AL1G10010     chr1      1-2541      - |   AL1G10010
##   AL1G10020     chr1   3306-6161      - |   AL1G10020
##   AL1G10040     chr1  9476-10535      + |   AL1G10040
##   AL1G10050     chr1 10548-11941      - |   AL1G10050
##   AL1G10060     chr1 12235-13421      - |   AL1G10060
##         ...      ...         ...    ... .         ...
##   AL1G10120     chr1 39834-42611      - |   AL1G10120
##   AL1G10130     chr1 42668-46181      - |   AL1G10130
##   AL1G10140     chr1 46346-48809      + |   AL1G10140
##   AL1G10160     chr1 49457-51261      - |   AL1G10160
##   AL1G10170     chr1 51346-52761      - |   AL1G10170
##   -------
##   seqinfo: 9 sequences from an unspecified genome; no seqlengths

make a copy

result.withGeneID.asDF.withRowNames <- result.withGeneMap.asDF

select gene_ID as rowNames - this helps in merging the data

rownames(result.withGeneID.asDF.withRowNames) <- 
  result.withGeneID.asDF.withRowNames$gene_ID 

Merge the data

mcols(genes.aly) <- result.withGeneID.asDF.withRowNames[
  names(genes.aly), 
  c("log2FoldChange", "stat", "pvalue", "padj", 
    "gene_ID", "gene_Name", "Significance")]

Add more data

genes.aly.ordered <- genes.aly[base::order(genes.aly$padj, na.last = TRUE), ]

filtered.genes.aly <- genes.aly[!is.na(genes.aly$padj)]
log.pval <- -log10(filtered.genes.aly$padj)
mcols(filtered.genes.aly)$log.pval <- log.pval

sign.genes <- filtered.genes.aly[filtered.genes.aly$padj < 0.05,]
interest.genes <- filtered.genes.aly[filtered.genes.aly$gene_Name %in% list("PIN1", "PIN3", "BRC2")]

top.genes <- genes.aly.ordered[1:6]

filtered.genes.aly[filtered.genes.aly$gene_Name == "BRC2"]
## GRanges object with 0 ranges and 8 metadata columns:
##    seqnames    ranges strand | log2FoldChange      stat    pvalue      padj
##       <Rle> <IRanges>  <Rle> |      <numeric> <numeric> <numeric> <numeric>
##        gene_ID   gene_Name Significance  log.pval
##    <character> <character>  <character> <numeric>
##   -------
##   seqinfo: 9 sequences from an unspecified genome; no seqlengths
fc.ymax <- ceiling(max(abs(range(sign.genes$log2FoldChange))))
fc.ymin <- -fc.ymax

cex.val <- sqrt(sign.genes$log.pval)/3

points.top <- 0.8

# To color the dots
col.over <- "#E63946"
col.under <- "#457b92"

sign.col <- rep(col.over, length(sign.genes))
sign.col[sign.genes$log2FoldChange<0] <- col.under

gene.mean <- start(top.genes) + (end(top.genes) - start(top.genes))/2

Step H-03: Plot Karyoplot

grDevices::png(filename = "kp_lyr.png",  width = 16.5, height = 10, units = 'cm', res = 400)



# Base Karyoplot
kp.lyr <- plotKaryotype(
  genome= lyr.custom.genome, cytobands = aly.cytoBands,
  chromosomes = "chr2",
  plot.type=2)
## Warning in ideogram.plotter(kp, ...): No 'gieStain' column found in cytobands.
## Using 'gpos50' (gray) for all of them
# Add points
kp.lyr <- kpPoints(kp.lyr, data=sign.genes, y=sign.genes$log2FoldChange, cex=cex.val, ymax=fc.ymax, ymin=fc.ymin, col=sign.col,
         #r1=points.top)
)

# Add y-axis
kpAxis(kp.lyr, ymax=fc.ymax, ymin=fc.ymin, 
       r1=points.top, cex = 0.8
)

# Add name of y label axis
kpAddLabels(kp.lyr, labels = "log2 Fold Change", srt=90, pos=1, label.margin = 0.07, ymax=fc.ymax, ymin=fc.ymin,
            r1=points.top, cex = 0.8
)
## Warning in text.default(x = x, y = y, labels = labels, pos = pos, offset = 0, :
## "ymax" is not a graphical parameter
## Warning in text.default(x = x, y = y, labels = labels, pos = pos, offset = 0, :
## "ymin" is not a graphical parameter
gene.mean <- start(top.genes) + (end(top.genes) - start(top.genes))/2


kpSegments(kp.lyr, chr=as.character(seqnames(top.genes)), x0=gene.mean, x1=gene.mean, y0=top.genes$log2FoldChange, y1=fc.ymax, ymax=fc.ymax, ymin=fc.ymin,
           r1=points.top
           )

# Text markers
kpPlotMarkers(kp.lyr, top.genes, labels = names(top.genes), text.orientation = "horizontal", 
              r0=points.top, label.dist = 0.003, cex = 0.8
              )

kpPlotDensity(kp.lyr, data=filtered.genes.aly, window.size = 10e4, data.panel = 2,
                    r1 = 0.8)

kp.lyr
## $plot.params
## $plot.params$leftmargin
## [1] 0.1
## 
## $plot.params$rightmargin
## [1] 0.05
## 
## $plot.params$topmargin
## [1] 120
## 
## $plot.params$bottommargin
## [1] 100
## 
## $plot.params$ideogramheight
## [1] 50
## 
## $plot.params$ideogramlateralmargin
## [1] 0
## 
## $plot.params$data1height
## [1] 200
## 
## $plot.params$data1inmargin
## [1] 20
## 
## $plot.params$data1outmargin
## [1] 20
## 
## $plot.params$data1min
## [1] 0
## 
## $plot.params$data1max
## [1] 1
## 
## $plot.params$data2height
## [1] 200
## 
## $plot.params$data2inmargin
## [1] 20
## 
## $plot.params$data2outmargin
## [1] 20
## 
## $plot.params$data2min
## [1] 0
## 
## $plot.params$data2max
## [1] 1
## 
## $plot.params$dataideogrammin
## [1] 0
## 
## $plot.params$dataideogrammax
## [1] 1
## 
## $plot.params$dataallmin
## [1] 0
## 
## $plot.params$dataallmax
## [1] 1
## 
## 
## $genome.name
## [1] "custom"
## 
## $genome
## GRanges object with 1 range and 0 metadata columns:
##        seqnames     ranges strand
##           <Rle>  <IRanges>  <Rle>
##   chr2     chr2 1-19320864      *
##   -------
##   seqinfo: 1 sequence from an unspecified genome; no seqlengths
## 
## $cytobands
## GRanges object with 32285 ranges and 3 metadata columns:
##         seqnames          ranges strand |   strand     width        Name
##            <Rle>       <IRanges>  <Rle> | <factor> <integer> <character>
##       1     chr1          1-2541      * |        -      2541   AL1G10010
##       2     chr1       3306-6161      * |        -      2856   AL1G10020
##       3     chr1       7429-7630      * |        +       202   AL1G10030
##       4     chr1      9476-10535      * |        +      1060   AL1G10040
##       5     chr1     10548-11941      * |        -      1394   AL1G10050
##     ...      ...             ...    ... .      ...       ...         ...
##   32281     Chr9 1791630-1793287      * |        +      1658   AL9U12200
##   32282     Chr9 1792941-1795545      * |        -      2605   AL9U12210
##   32283     Chr9 1796870-1801565      * |        -      4696   AL9U12220
##   32284     Chr9 1810180-1812874      * |        +      2695   AL9U12230
##   32285     Chr9 1836100-1837535      * |        -      1436   AL9U12240
##   -------
##   seqinfo: 9 sequences from an unspecified genome; no seqlengths
## 
## $plot.type
## [1] 2
## 
## $plot.region
## GRanges object with 1 range and 0 metadata columns:
##        seqnames     ranges strand
##           <Rle>  <IRanges>  <Rle>
##   chr2     chr2 1-19320864      *
##   -------
##   seqinfo: 1 sequence from an unspecified genome; no seqlengths
## 
## $zoom
## [1] FALSE
## 
## $chromosomes
## [1] "chr2"
## 
## $chromosome.lengths
##     chr2 
## 19320864 
## 
## $coord.change.function
## function (chr = NULL, x = NULL, y = NULL, data.panel = NULL) 
## {
##     if (is.null(data.panel)) 
##         stop("In coordChangeFunction: data.panel is required")
##     if (!is.null(y)) {
##         if (is.null(chr)) {
##             stop("If y is not NULL, chr must be specified too")
##         }
##         if (length(y) != length(chr)) {
##             stop("If y is not NULL, it have to have the same length as chr")
##         }
##     }
##     return(genomic2plot(chr = chr, x = x, y = y, data.panel = data.panel, 
##         genome = genome, plot.params = plot.params))
## }
## <bytecode: 0x7fa80a67a6a0>
## <environment: 0x7fa80aeb7778>
## 
## $ideogram.mid
## function (chr) 
## {
##     return(ideoMid(chr = chr, genome = genome, plot.params = plot.params))
## }
## <bytecode: 0x7fa80a673938>
## <environment: 0x7fa80aeb7778>
## 
## $chromosome.height
## [1] 530
## 
## $graphical.par
## $graphical.par$old.par
## $graphical.par$old.par$xlog
## [1] FALSE
## 
## $graphical.par$old.par$ylog
## [1] FALSE
## 
## $graphical.par$old.par$adj
## [1] 0.5
## 
## $graphical.par$old.par$ann
## [1] TRUE
## 
## $graphical.par$old.par$ask
## [1] FALSE
## 
## $graphical.par$old.par$bg
## [1] "white"
## 
## $graphical.par$old.par$bty
## [1] "o"
## 
## $graphical.par$old.par$cex
## [1] 1
## 
## $graphical.par$old.par$cex.axis
## [1] 1
## 
## $graphical.par$old.par$cex.lab
## [1] 1
## 
## $graphical.par$old.par$cex.main
## [1] 1.2
## 
## $graphical.par$old.par$cex.sub
## [1] 1
## 
## $graphical.par$old.par$col
## [1] "black"
## 
## $graphical.par$old.par$col.axis
## [1] "black"
## 
## $graphical.par$old.par$col.lab
## [1] "black"
## 
## $graphical.par$old.par$col.main
## [1] "black"
## 
## $graphical.par$old.par$col.sub
## [1] "black"
## 
## $graphical.par$old.par$crt
## [1] 0
## 
## $graphical.par$old.par$err
## [1] 0
## 
## $graphical.par$old.par$family
## [1] ""
## 
## $graphical.par$old.par$fg
## [1] "black"
## 
## $graphical.par$old.par$fig
## [1] 0 1 0 1
## 
## $graphical.par$old.par$fin
## [1] 6.496063 3.937008
## 
## $graphical.par$old.par$font
## [1] 1
## 
## $graphical.par$old.par$font.axis
## [1] 1
## 
## $graphical.par$old.par$font.lab
## [1] 1
## 
## $graphical.par$old.par$font.main
## [1] 2
## 
## $graphical.par$old.par$font.sub
## [1] 1
## 
## $graphical.par$old.par$lab
## [1] 5 5 7
## 
## $graphical.par$old.par$las
## [1] 0
## 
## $graphical.par$old.par$lend
## [1] "round"
## 
## $graphical.par$old.par$lheight
## [1] 1
## 
## $graphical.par$old.par$ljoin
## [1] "round"
## 
## $graphical.par$old.par$lmitre
## [1] 10
## 
## $graphical.par$old.par$lty
## [1] "solid"
## 
## $graphical.par$old.par$lwd
## [1] 1
## 
## $graphical.par$old.par$mai
## [1] 1.02 0.82 0.82 0.42
## 
## $graphical.par$old.par$mar
## [1] 5.1 4.1 4.1 2.1
## 
## $graphical.par$old.par$mex
## [1] 1
## 
## $graphical.par$old.par$mfcol
## [1] 1 1
## 
## $graphical.par$old.par$mfg
## [1] 1 1 1 1
## 
## $graphical.par$old.par$mfrow
## [1] 1 1
## 
## $graphical.par$old.par$mgp
## [1] 3 1 0
## 
## $graphical.par$old.par$mkh
## [1] 0.001
## 
## $graphical.par$old.par$new
## [1] FALSE
## 
## $graphical.par$old.par$oma
## [1] 0 0 0 0
## 
## $graphical.par$old.par$omd
## [1] 0 1 0 1
## 
## $graphical.par$old.par$omi
## [1] 0 0 0 0
## 
## $graphical.par$old.par$pch
## [1] 1
## 
## $graphical.par$old.par$pin
## [1] 5.256063 2.097008
## 
## $graphical.par$old.par$plt
## [1] 0.1262303 0.9353455 0.2590800 0.7917200
## 
## $graphical.par$old.par$ps
## [1] 12
## 
## $graphical.par$old.par$pty
## [1] "m"
## 
## $graphical.par$old.par$smo
## [1] 1
## 
## $graphical.par$old.par$srt
## [1] 0
## 
## $graphical.par$old.par$tck
## [1] NA
## 
## $graphical.par$old.par$tcl
## [1] -0.5
## 
## $graphical.par$old.par$usr
## [1] 0 1 0 1
## 
## $graphical.par$old.par$xaxp
## [1] 0 1 5
## 
## $graphical.par$old.par$xaxs
## [1] "r"
## 
## $graphical.par$old.par$xaxt
## [1] "s"
## 
## $graphical.par$old.par$xpd
## [1] FALSE
## 
## $graphical.par$old.par$yaxp
## [1] 0 1 5
## 
## $graphical.par$old.par$yaxs
## [1] "r"
## 
## $graphical.par$old.par$yaxt
## [1] "s"
## 
## $graphical.par$old.par$ylbias
## [1] 0.2
## 
## 
## $graphical.par$new.par
## $graphical.par$new.par$xlog
## [1] FALSE
## 
## $graphical.par$new.par$ylog
## [1] FALSE
## 
## $graphical.par$new.par$adj
## [1] 0.5
## 
## $graphical.par$new.par$ann
## [1] TRUE
## 
## $graphical.par$new.par$ask
## [1] FALSE
## 
## $graphical.par$new.par$bg
## [1] "white"
## 
## $graphical.par$new.par$bty
## [1] "o"
## 
## $graphical.par$new.par$cex
## [1] 1
## 
## $graphical.par$new.par$cex.axis
## [1] 1
## 
## $graphical.par$new.par$cex.lab
## [1] 1
## 
## $graphical.par$new.par$cex.main
## [1] 1.2
## 
## $graphical.par$new.par$cex.sub
## [1] 1
## 
## $graphical.par$new.par$col
## [1] "black"
## 
## $graphical.par$new.par$col.axis
## [1] "black"
## 
## $graphical.par$new.par$col.lab
## [1] "black"
## 
## $graphical.par$new.par$col.main
## [1] "black"
## 
## $graphical.par$new.par$col.sub
## [1] "black"
## 
## $graphical.par$new.par$crt
## [1] 0
## 
## $graphical.par$new.par$err
## [1] 0
## 
## $graphical.par$new.par$family
## [1] ""
## 
## $graphical.par$new.par$fg
## [1] "black"
## 
## $graphical.par$new.par$fig
## [1] 0 1 0 1
## 
## $graphical.par$new.par$fin
## [1] 6.496063 3.937008
## 
## $graphical.par$new.par$font
## [1] 1
## 
## $graphical.par$new.par$font.axis
## [1] 1
## 
## $graphical.par$new.par$font.lab
## [1] 1
## 
## $graphical.par$new.par$font.main
## [1] 2
## 
## $graphical.par$new.par$font.sub
## [1] 1
## 
## $graphical.par$new.par$lab
## [1] 5 5 7
## 
## $graphical.par$new.par$las
## [1] 0
## 
## $graphical.par$new.par$lend
## [1] "round"
## 
## $graphical.par$new.par$lheight
## [1] 1
## 
## $graphical.par$new.par$ljoin
## [1] "round"
## 
## $graphical.par$new.par$lmitre
## [1] 10
## 
## $graphical.par$new.par$lty
## [1] "solid"
## 
## $graphical.par$new.par$lwd
## [1] 1
## 
## $graphical.par$new.par$mai
## [1] 0.02 0.02 0.02 0.02
## 
## $graphical.par$new.par$mar
## [1] 0.1 0.1 0.1 0.1
## 
## $graphical.par$new.par$mex
## [1] 1
## 
## $graphical.par$new.par$mfcol
## [1] 1 1
## 
## $graphical.par$new.par$mfg
## [1] 1 1 1 1
## 
## $graphical.par$new.par$mfrow
## [1] 1 1
## 
## $graphical.par$new.par$mgp
## [1] 3 1 0
## 
## $graphical.par$new.par$mkh
## [1] 0.001
## 
## $graphical.par$new.par$new
## [1] FALSE
## 
## $graphical.par$new.par$oma
## [1] 0 0 0 0
## 
## $graphical.par$new.par$omd
## [1] 0 1 0 1
## 
## $graphical.par$new.par$omi
## [1] 0 0 0 0
## 
## $graphical.par$new.par$pch
## [1] 1
## 
## $graphical.par$new.par$pin
## [1] 6.456063 3.897008
## 
## $graphical.par$new.par$plt
## [1] 0.003078788 0.996921212 0.005080000 0.994920000
## 
## $graphical.par$new.par$ps
## [1] 12
## 
## $graphical.par$new.par$pty
## [1] "m"
## 
## $graphical.par$new.par$smo
## [1] 1
## 
## $graphical.par$new.par$srt
## [1] 0
## 
## $graphical.par$new.par$tck
## [1] NA
## 
## $graphical.par$new.par$tcl
## [1] -0.5
## 
## $graphical.par$new.par$usr
## [1]   0   1   0 750
## 
## $graphical.par$new.par$xaxp
## [1] 0 1 5
## 
## $graphical.par$new.par$xaxs
## [1] "r"
## 
## $graphical.par$new.par$xaxt
## [1] "s"
## 
## $graphical.par$new.par$xpd
## [1] FALSE
## 
## $graphical.par$new.par$yaxp
## [1]   0 700   7
## 
## $graphical.par$new.par$yaxs
## [1] "r"
## 
## $graphical.par$new.par$yaxt
## [1] "s"
## 
## $graphical.par$new.par$ylbias
## [1] 0.2
## 
## 
## 
## $beginKpPlot
## function () 
## {
##     graphics::par(kp$graphical.par$new.par)
## }
## <bytecode: 0x7fa80a7a5820>
## <environment: 0x7fa80b8df1a8>
## 
## $endKpPlot
## function () 
## {
##     graphics::par(kp$graphical.par$old.par)
## }
## <bytecode: 0x7fa80a7a5d98>
## <environment: 0x7fa80b8df1a8>
## 
## $plot
## $plot$xmin
## [1] 0
## 
## $plot$xmax
## [1] 1
## 
## $plot$ymin
## [1] 0
## 
## $plot$ymax
## [1] 750
## 
## 
## attr(,"class")
## [1] "KaryoPlot"
dev.off()
## quartz_off_screen 
##                 2
knitr::include_graphics("kp_lyr.png")